aboutsummaryrefslogtreecommitdiff
path: root/src/Specific
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-11-02 01:36:04 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-11-02 01:36:04 -0400
commit5b45aa93efa19d54e50299ff0df8ee46c60f891b (patch)
tree9cea98ab783f557b5cbca0456a4dc3b4498e43c5 /src/Specific
parente8bda9b779d5762c5868cd09c85142151655d5ca (diff)
Update display logs and c files
Diffstat (limited to 'src/Specific')
-rw-r--r--src/Specific/montgomery32_2e127m1/feadd.c38
-rw-r--r--src/Specific/montgomery32_2e127m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e127m1/femul.c136
-rw-r--r--src/Specific/montgomery32_2e127m1/femul.h6
-rw-r--r--src/Specific/montgomery32_2e127m1/fenz.c25
-rw-r--r--src/Specific/montgomery32_2e127m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e127m1/feopp.c38
-rw-r--r--src/Specific/montgomery32_2e127m1/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e127m1/fesub.c38
-rw-r--r--src/Specific/montgomery32_2e127m1/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e129m25/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e129m25/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e129m25/femul.c190
-rw-r--r--src/Specific/montgomery32_2e129m25/femul.h6
-rw-r--r--src/Specific/montgomery32_2e129m25/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e129m25/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e129m25/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e129m25/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e129m25/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e129m25/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e130m5/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e130m5/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e130m5/femul.c39
-rw-r--r--src/Specific/montgomery32_2e130m5/femul.h6
-rw-r--r--src/Specific/montgomery32_2e130m5/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e130m5/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e130m5/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e130m5/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e130m5/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e130m5/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e137m13/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e137m13/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e137m13/femul.c200
-rw-r--r--src/Specific/montgomery32_2e137m13/femul.h6
-rw-r--r--src/Specific/montgomery32_2e137m13/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e137m13/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e137m13/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e137m13/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e137m13/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e137m13/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e140m27/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e140m27/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e140m27/femul.c200
-rw-r--r--src/Specific/montgomery32_2e140m27/femul.h6
-rw-r--r--src/Specific/montgomery32_2e140m27/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e140m27/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e140m27/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e140m27/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e140m27/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e140m27/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e141m9/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e141m9/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e141m9/femul.c200
-rw-r--r--src/Specific/montgomery32_2e141m9/femul.h6
-rw-r--r--src/Specific/montgomery32_2e141m9/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e141m9/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e141m9/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e141m9/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e141m9/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e141m9/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e150m3/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e150m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e150m3/femul.c200
-rw-r--r--src/Specific/montgomery32_2e150m3/femul.h6
-rw-r--r--src/Specific/montgomery32_2e150m3/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e150m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e150m3/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e150m3/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e150m3/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e150m3/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e150m5/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e150m5/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e150m5/femul.c200
-rw-r--r--src/Specific/montgomery32_2e150m5/femul.h6
-rw-r--r--src/Specific/montgomery32_2e150m5/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e150m5/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e150m5/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e150m5/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e150m5/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e150m5/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e152m17/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e152m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e152m17/femul.c200
-rw-r--r--src/Specific/montgomery32_2e152m17/femul.h6
-rw-r--r--src/Specific/montgomery32_2e152m17/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e152m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e152m17/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e152m17/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e152m17/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e152m17/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e158m15/feadd.c42
-rw-r--r--src/Specific/montgomery32_2e158m15/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e158m15/femul.c200
-rw-r--r--src/Specific/montgomery32_2e158m15/femul.h6
-rw-r--r--src/Specific/montgomery32_2e158m15/fenz.c26
-rw-r--r--src/Specific/montgomery32_2e158m15/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e158m15/feopp.c42
-rw-r--r--src/Specific/montgomery32_2e158m15/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e158m15/fesub.c42
-rw-r--r--src/Specific/montgomery32_2e158m15/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e165m25/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e165m25/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e165m25/femul.c42
-rw-r--r--src/Specific/montgomery32_2e165m25/femul.h6
-rw-r--r--src/Specific/montgomery32_2e165m25/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e165m25/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e165m25/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e165m25/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e165m25/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e165m25/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e166m5/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e166m5/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e166m5/femul.c42
-rw-r--r--src/Specific/montgomery32_2e166m5/femul.h6
-rw-r--r--src/Specific/montgomery32_2e166m5/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e166m5/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e166m5/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e166m5/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e166m5/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e166m5/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e171m19/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e171m19/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e171m19/femul.c272
-rw-r--r--src/Specific/montgomery32_2e171m19/femul.h6
-rw-r--r--src/Specific/montgomery32_2e171m19/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e171m19/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e171m19/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e171m19/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e171m19/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e171m19/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e174m17/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e174m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e174m17/femul.c272
-rw-r--r--src/Specific/montgomery32_2e174m17/femul.h6
-rw-r--r--src/Specific/montgomery32_2e174m17/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e174m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e174m17/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e174m17/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e174m17/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e174m17/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e174m3/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e174m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e174m3/femul.c272
-rw-r--r--src/Specific/montgomery32_2e174m3/femul.h6
-rw-r--r--src/Specific/montgomery32_2e174m3/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e174m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e174m3/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e174m3/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e174m3/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e174m3/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e189m25/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e189m25/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e189m25/femul.c272
-rw-r--r--src/Specific/montgomery32_2e189m25/femul.h6
-rw-r--r--src/Specific/montgomery32_2e189m25/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e189m25/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e189m25/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e189m25/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e189m25/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e189m25/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e190m11/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e190m11/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e190m11/femul.c272
-rw-r--r--src/Specific/montgomery32_2e190m11/femul.h6
-rw-r--r--src/Specific/montgomery32_2e190m11/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e190m11/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e190m11/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e190m11/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e190m11/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e190m11/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e191m19/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e191m19/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e191m19/femul.c272
-rw-r--r--src/Specific/montgomery32_2e191m19/femul.h6
-rw-r--r--src/Specific/montgomery32_2e191m19/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e191m19/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e191m19/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e191m19/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e191m19/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e191m19/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feadd.c46
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/femul.c266
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/femul.h6
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fenz.c27
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feopp.c46
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/feopp.h6
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fesub.c46
-rw-r--r--src/Specific/montgomery32_2e192m2e64m1/fesub.h6
-rw-r--r--src/Specific/montgomery32_2e194m33/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e194m33/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e194m33/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e194m33/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e196m15/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e196m15/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e196m15/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e196m15/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e198m17/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e198m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e198m17/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e198m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e205m45x2e198m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e206m5/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e206m5/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e206m5/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e206m5/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e212m29/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e212m29/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e212m29/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e212m29/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e213m3/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e213m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e213m3/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e213m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e216m2e108m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e221m3/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e221m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e221m3/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e221m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e222m117/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e222m117/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e222m117/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e222m117/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/feadd.c50
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/femul.c328
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/femul.h6
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/fenz.c28
-rw-r--r--src/Specific/montgomery32_2e224m2e96p1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e226m5/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e226m5/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e226m5/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e226m5/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e230m27/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e230m27/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e230m27/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e230m27/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e235m15/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e235m15/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e235m15/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e235m15/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e243m9/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e243m9/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e243m9/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e243m9/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e251m9/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e251m9/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e251m9/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e251m9/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e254m127x2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e255m19/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e255m19/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e255m19/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e255m19/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e255m765/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e255m765/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e255m765/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e255m765/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e256m189/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e256m189/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e256m189/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e256m189/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e256m2e32m977/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/feadd.c54
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/fenz.c29
-rw-r--r--src/Specific/montgomery32_2e256m88x2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e266m3/feadd.c58
-rw-r--r--src/Specific/montgomery32_2e266m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e266m3/fenz.c30
-rw-r--r--src/Specific/montgomery32_2e266m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e285m9/feadd.c58
-rw-r--r--src/Specific/montgomery32_2e285m9/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e285m9/fenz.c30
-rw-r--r--src/Specific/montgomery32_2e285m9/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e291m19/feadd.c62
-rw-r--r--src/Specific/montgomery32_2e291m19/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e291m19/fenz.c31
-rw-r--r--src/Specific/montgomery32_2e291m19/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e321m9/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e321m9/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e321m9/fenz.c32
-rw-r--r--src/Specific/montgomery32_2e321m9/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/fenz.c32
-rw-r--r--src/Specific/montgomery32_2e322m2e161m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e336m17/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e336m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e336m17/fenz.c32
-rw-r--r--src/Specific/montgomery32_2e336m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e336m3/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e336m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e336m3/fenz.c32
-rw-r--r--src/Specific/montgomery32_2e336m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e338m15/feadd.c66
-rw-r--r--src/Specific/montgomery32_2e338m15/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e338m15/fenz.c32
-rw-r--r--src/Specific/montgomery32_2e338m15/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e369m25/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e369m25/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e369m25/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e369m25/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e379m19/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e379m19/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e379m19/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e379m19/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e382m105/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e382m105/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e382m105/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e382m105/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e383m187/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e383m187/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e383m187/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e383m187/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e383m31/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e383m31/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e383m31/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e383m31/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e383m421/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e383m421/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e383m421/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e383m421/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e384m317/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e384m317/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e384m317/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e384m317/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e384m5x2e368m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/feadd.c70
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/fenz.c33
-rw-r--r--src/Specific/montgomery32_2e384m79x2e376m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e389m21/feadd.c74
-rw-r--r--src/Specific/montgomery32_2e389m21/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e389m21/fenz.c34
-rw-r--r--src/Specific/montgomery32_2e389m21/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e401m31/feadd.c74
-rw-r--r--src/Specific/montgomery32_2e401m31/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e401m31/fenz.c34
-rw-r--r--src/Specific/montgomery32_2e401m31/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e413m21/feadd.c74
-rw-r--r--src/Specific/montgomery32_2e413m21/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e413m21/fenz.c34
-rw-r--r--src/Specific/montgomery32_2e413m21/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e414m17/feadd.c74
-rw-r--r--src/Specific/montgomery32_2e414m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e414m17/fenz.c34
-rw-r--r--src/Specific/montgomery32_2e414m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/feadd.c74
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/fenz.c34
-rw-r--r--src/Specific/montgomery32_2e416m2e208m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e444m17/feadd.c78
-rw-r--r--src/Specific/montgomery32_2e444m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e444m17/fenz.c35
-rw-r--r--src/Specific/montgomery32_2e444m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/feadd.c78
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/fenz.c35
-rw-r--r--src/Specific/montgomery32_2e448m2e224m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/feadd.c82
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/fenz.c36
-rw-r--r--src/Specific/montgomery32_2e450m2e225m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e452m3/feadd.c82
-rw-r--r--src/Specific/montgomery32_2e452m3/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e452m3/fenz.c36
-rw-r--r--src/Specific/montgomery32_2e452m3/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e468m17/feadd.c82
-rw-r--r--src/Specific/montgomery32_2e468m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e468m17/fenz.c36
-rw-r--r--src/Specific/montgomery32_2e468m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/feadd.c82
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/fenz.c36
-rw-r--r--src/Specific/montgomery32_2e480m2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e488m17/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e488m17/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e488m17/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e488m17/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e489m21/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e489m21/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e489m21/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e489m21/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e495m31/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e495m31/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e495m31/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e495m31/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e510m290x2e496m1/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e510m290x2e496m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e511m187/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e511m187/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e511m187/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e511m187/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e511m481/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e511m481/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e511m481/feaddDisplay.log2
-rw-r--r--src/Specific/montgomery32_2e511m481/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e511m481/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e512m491x2e496m1/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e512m491x2e496m1/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e512m569/feadd.c86
-rw-r--r--src/Specific/montgomery32_2e512m569/feadd.h6
-rw-r--r--src/Specific/montgomery32_2e512m569/feaddDisplay.log2
-rw-r--r--src/Specific/montgomery32_2e512m569/fenz.c37
-rw-r--r--src/Specific/montgomery32_2e512m569/fenz.h6
-rw-r--r--src/Specific/montgomery32_2e521m1/fenz.c38
-rw-r--r--src/Specific/montgomery32_2e521m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e127m1/feadd.c30
-rw-r--r--src/Specific/montgomery64_2e127m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e127m1/femul.c54
-rw-r--r--src/Specific/montgomery64_2e127m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e127m1/fenz.c23
-rw-r--r--src/Specific/montgomery64_2e127m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e127m1/feopp.c30
-rw-r--r--src/Specific/montgomery64_2e127m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e127m1/fesub.c30
-rw-r--r--src/Specific/montgomery64_2e127m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e129m25/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e129m25/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e129m25/femul.c86
-rw-r--r--src/Specific/montgomery64_2e129m25/femul.h6
-rw-r--r--src/Specific/montgomery64_2e129m25/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e129m25/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e129m25/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e129m25/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e129m25/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e129m25/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e130m5/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e130m5/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e130m5/femul.c33
-rw-r--r--src/Specific/montgomery64_2e130m5/femul.h6
-rw-r--r--src/Specific/montgomery64_2e130m5/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e130m5/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e130m5/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e130m5/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e130m5/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e130m5/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e137m13/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e137m13/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e137m13/femul.c92
-rw-r--r--src/Specific/montgomery64_2e137m13/femul.h6
-rw-r--r--src/Specific/montgomery64_2e137m13/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e137m13/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e137m13/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e137m13/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e137m13/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e137m13/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e140m27/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e140m27/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e140m27/femul.c92
-rw-r--r--src/Specific/montgomery64_2e140m27/femul.h6
-rw-r--r--src/Specific/montgomery64_2e140m27/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e140m27/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e140m27/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e140m27/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e140m27/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e140m27/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e141m9/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e141m9/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e141m9/femul.c92
-rw-r--r--src/Specific/montgomery64_2e141m9/femul.h6
-rw-r--r--src/Specific/montgomery64_2e141m9/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e141m9/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e141m9/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e141m9/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e141m9/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e141m9/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e150m3/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e150m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e150m3/femul.c92
-rw-r--r--src/Specific/montgomery64_2e150m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e150m3/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e150m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e150m3/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e150m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e150m3/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e150m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e150m5/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e150m5/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e150m5/femul.c92
-rw-r--r--src/Specific/montgomery64_2e150m5/femul.h6
-rw-r--r--src/Specific/montgomery64_2e150m5/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e150m5/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e150m5/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e150m5/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e150m5/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e150m5/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e152m17/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e152m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e152m17/femul.c92
-rw-r--r--src/Specific/montgomery64_2e152m17/femul.h6
-rw-r--r--src/Specific/montgomery64_2e152m17/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e152m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e152m17/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e152m17/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e152m17/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e152m17/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e158m15/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e158m15/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e158m15/femul.c92
-rw-r--r--src/Specific/montgomery64_2e158m15/femul.h6
-rw-r--r--src/Specific/montgomery64_2e158m15/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e158m15/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e158m15/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e158m15/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e158m15/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e158m15/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e165m25/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e165m25/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e165m25/femul.c92
-rw-r--r--src/Specific/montgomery64_2e165m25/femul.h6
-rw-r--r--src/Specific/montgomery64_2e165m25/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e165m25/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e165m25/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e165m25/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e165m25/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e165m25/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e166m5/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e166m5/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e166m5/femul.c92
-rw-r--r--src/Specific/montgomery64_2e166m5/femul.h6
-rw-r--r--src/Specific/montgomery64_2e166m5/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e166m5/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e166m5/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e166m5/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e166m5/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e166m5/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e171m19/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e171m19/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e171m19/femul.c92
-rw-r--r--src/Specific/montgomery64_2e171m19/femul.h6
-rw-r--r--src/Specific/montgomery64_2e171m19/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e171m19/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e171m19/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e171m19/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e171m19/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e171m19/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e174m17/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e174m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e174m17/femul.c92
-rw-r--r--src/Specific/montgomery64_2e174m17/femul.h6
-rw-r--r--src/Specific/montgomery64_2e174m17/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e174m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e174m17/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e174m17/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e174m17/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e174m17/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e174m3/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e174m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e174m3/femul.c92
-rw-r--r--src/Specific/montgomery64_2e174m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e174m3/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e174m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e174m3/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e174m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e174m3/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e174m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e189m25/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e189m25/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e189m25/femul.c92
-rw-r--r--src/Specific/montgomery64_2e189m25/femul.h6
-rw-r--r--src/Specific/montgomery64_2e189m25/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e189m25/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e189m25/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e189m25/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e189m25/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e189m25/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e190m11/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e190m11/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e190m11/femul.c92
-rw-r--r--src/Specific/montgomery64_2e190m11/femul.h6
-rw-r--r--src/Specific/montgomery64_2e190m11/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e190m11/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e190m11/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e190m11/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e190m11/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e190m11/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e191m19/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e191m19/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e191m19/femul.c92
-rw-r--r--src/Specific/montgomery64_2e191m19/femul.h6
-rw-r--r--src/Specific/montgomery64_2e191m19/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e191m19/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e191m19/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e191m19/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e191m19/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e191m19/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feadd.c34
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/femul.c89
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fenz.c24
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feopp.c34
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fesub.c34
-rw-r--r--src/Specific/montgomery64_2e192m2e64m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e194m33/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e194m33/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e194m33/femul.c36
-rw-r--r--src/Specific/montgomery64_2e194m33/femul.h6
-rw-r--r--src/Specific/montgomery64_2e194m33/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e194m33/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e194m33/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e194m33/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e194m33/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e194m33/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e196m15/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e196m15/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e196m15/femul.c36
-rw-r--r--src/Specific/montgomery64_2e196m15/femul.h6
-rw-r--r--src/Specific/montgomery64_2e196m15/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e196m15/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e196m15/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e196m15/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e196m15/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e196m15/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e198m17/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e198m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e198m17/femul.c36
-rw-r--r--src/Specific/montgomery64_2e198m17/femul.h6
-rw-r--r--src/Specific/montgomery64_2e198m17/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e198m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e198m17/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e198m17/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e198m17/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e198m17/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/femul.c136
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e205m45x2e198m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e206m5/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e206m5/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e206m5/femul.c140
-rw-r--r--src/Specific/montgomery64_2e206m5/femul.h6
-rw-r--r--src/Specific/montgomery64_2e206m5/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e206m5/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e206m5/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e206m5/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e206m5/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e206m5/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e212m29/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e212m29/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e212m29/femul.c140
-rw-r--r--src/Specific/montgomery64_2e212m29/femul.h6
-rw-r--r--src/Specific/montgomery64_2e212m29/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e212m29/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e212m29/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e212m29/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e212m29/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e212m29/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e213m3/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e213m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e213m3/femul.c140
-rw-r--r--src/Specific/montgomery64_2e213m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e213m3/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e213m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e213m3/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e213m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e213m3/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e213m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/femul.c136
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e216m2e108m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e221m3/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e221m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e221m3/femul.c140
-rw-r--r--src/Specific/montgomery64_2e221m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e221m3/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e221m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e221m3/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e221m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e221m3/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e221m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e222m117/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e222m117/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e222m117/femul.c140
-rw-r--r--src/Specific/montgomery64_2e222m117/femul.h6
-rw-r--r--src/Specific/montgomery64_2e222m117/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e222m117/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e222m117/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e222m117/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e222m117/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e222m117/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/femul.c132
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e224m2e96p1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e226m5/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e226m5/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e226m5/femul.c140
-rw-r--r--src/Specific/montgomery64_2e226m5/femul.h6
-rw-r--r--src/Specific/montgomery64_2e226m5/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e226m5/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e226m5/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e226m5/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e226m5/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e226m5/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e230m27/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e230m27/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e230m27/femul.c140
-rw-r--r--src/Specific/montgomery64_2e230m27/femul.h6
-rw-r--r--src/Specific/montgomery64_2e230m27/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e230m27/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e230m27/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e230m27/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e230m27/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e230m27/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e235m15/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e235m15/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e235m15/femul.c140
-rw-r--r--src/Specific/montgomery64_2e235m15/femul.h6
-rw-r--r--src/Specific/montgomery64_2e235m15/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e235m15/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e235m15/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e235m15/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e235m15/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e235m15/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e243m9/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e243m9/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e243m9/femul.c140
-rw-r--r--src/Specific/montgomery64_2e243m9/femul.h6
-rw-r--r--src/Specific/montgomery64_2e243m9/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e243m9/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e243m9/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e243m9/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e243m9/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e243m9/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e251m9/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e251m9/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e251m9/femul.c140
-rw-r--r--src/Specific/montgomery64_2e251m9/femul.h6
-rw-r--r--src/Specific/montgomery64_2e251m9/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e251m9/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e251m9/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e251m9/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e251m9/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e251m9/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/femul.c136
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e254m127x2e240m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e255m19/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e255m19/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e255m19/femul.c140
-rw-r--r--src/Specific/montgomery64_2e255m19/femul.h6
-rw-r--r--src/Specific/montgomery64_2e255m19/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e255m19/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e255m19/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e255m19/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e255m19/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e255m19/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c140
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e255m765/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e255m765/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e255m765/femul.c140
-rw-r--r--src/Specific/montgomery64_2e255m765/femul.h6
-rw-r--r--src/Specific/montgomery64_2e255m765/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e255m765/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e255m765/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e255m765/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e255m765/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e255m765/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e256m189/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e256m189/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e256m189/femul.c140
-rw-r--r--src/Specific/montgomery64_2e256m189/femul.h6
-rw-r--r--src/Specific/montgomery64_2e256m189/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e256m189/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e256m189/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e256m189/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e256m189/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e256m189/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c132
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c37
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c37
-rw-r--r--src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/femul.c140
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/femul.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e256m2e32m977/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feadd.c38
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/femul.c136
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fenz.c25
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feopp.c38
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fesub.c38
-rw-r--r--src/Specific/montgomery64_2e256m88x2e240m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e266m3/feadd.c42
-rw-r--r--src/Specific/montgomery64_2e266m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e266m3/femul.c200
-rw-r--r--src/Specific/montgomery64_2e266m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e266m3/fenz.c26
-rw-r--r--src/Specific/montgomery64_2e266m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e266m3/feopp.c42
-rw-r--r--src/Specific/montgomery64_2e266m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e266m3/fesub.c42
-rw-r--r--src/Specific/montgomery64_2e266m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e285m9/feadd.c42
-rw-r--r--src/Specific/montgomery64_2e285m9/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e285m9/femul.c200
-rw-r--r--src/Specific/montgomery64_2e285m9/femul.h6
-rw-r--r--src/Specific/montgomery64_2e285m9/fenz.c26
-rw-r--r--src/Specific/montgomery64_2e285m9/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e285m9/feopp.c42
-rw-r--r--src/Specific/montgomery64_2e285m9/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e285m9/fesub.c42
-rw-r--r--src/Specific/montgomery64_2e285m9/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e291m19/feadd.c42
-rw-r--r--src/Specific/montgomery64_2e291m19/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e291m19/femul.c200
-rw-r--r--src/Specific/montgomery64_2e291m19/femul.h6
-rw-r--r--src/Specific/montgomery64_2e291m19/fenz.c26
-rw-r--r--src/Specific/montgomery64_2e291m19/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e291m19/feopp.c42
-rw-r--r--src/Specific/montgomery64_2e291m19/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e291m19/fesub.c42
-rw-r--r--src/Specific/montgomery64_2e291m19/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e321m9/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e321m9/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e321m9/femul.c260
-rw-r--r--src/Specific/montgomery64_2e321m9/femul.h6
-rw-r--r--src/Specific/montgomery64_2e321m9/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e321m9/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e321m9/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e321m9/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e321m9/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e321m9/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/femul.c41
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e322m2e161m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e336m17/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e336m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e336m17/femul.c272
-rw-r--r--src/Specific/montgomery64_2e336m17/femul.h6
-rw-r--r--src/Specific/montgomery64_2e336m17/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e336m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e336m17/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e336m17/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e336m17/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e336m17/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e336m3/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e336m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e336m3/femul.c272
-rw-r--r--src/Specific/montgomery64_2e336m3/femul.h6
-rw-r--r--src/Specific/montgomery64_2e336m3/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e336m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e336m3/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e336m3/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e336m3/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e336m3/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e338m15/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e338m15/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e338m15/femul.c272
-rw-r--r--src/Specific/montgomery64_2e338m15/femul.h6
-rw-r--r--src/Specific/montgomery64_2e338m15/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e338m15/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e338m15/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e338m15/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e338m15/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e338m15/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e369m25/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e369m25/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e369m25/femul.c272
-rw-r--r--src/Specific/montgomery64_2e369m25/femul.h6
-rw-r--r--src/Specific/montgomery64_2e369m25/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e369m25/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e369m25/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e369m25/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e369m25/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e369m25/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e379m19/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e379m19/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e379m19/femul.c272
-rw-r--r--src/Specific/montgomery64_2e379m19/femul.h6
-rw-r--r--src/Specific/montgomery64_2e379m19/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e379m19/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e379m19/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e379m19/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e379m19/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e379m19/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e382m105/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e382m105/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e382m105/femul.c272
-rw-r--r--src/Specific/montgomery64_2e382m105/femul.h6
-rw-r--r--src/Specific/montgomery64_2e382m105/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e382m105/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e382m105/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e382m105/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e382m105/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e382m105/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e383m187/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e383m187/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e383m187/femul.c272
-rw-r--r--src/Specific/montgomery64_2e383m187/femul.h6
-rw-r--r--src/Specific/montgomery64_2e383m187/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e383m187/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e383m187/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e383m187/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e383m187/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e383m187/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e383m31/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e383m31/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e383m31/femul.c272
-rw-r--r--src/Specific/montgomery64_2e383m31/femul.h6
-rw-r--r--src/Specific/montgomery64_2e383m31/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e383m31/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e383m31/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e383m31/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e383m31/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e383m31/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e383m421/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e383m421/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e383m421/femul.c272
-rw-r--r--src/Specific/montgomery64_2e383m421/femul.h6
-rw-r--r--src/Specific/montgomery64_2e383m421/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e383m421/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e383m421/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e383m421/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e383m421/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e383m421/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c272
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e384m317/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e384m317/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e384m317/femul.c272
-rw-r--r--src/Specific/montgomery64_2e384m317/femul.h6
-rw-r--r--src/Specific/montgomery64_2e384m317/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e384m317/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e384m317/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e384m317/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e384m317/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e384m317/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/femul.c266
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e384m5x2e368m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feadd.c46
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/femul.c266
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/femul.h6
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fenz.c27
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feopp.c46
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/feopp.h6
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fesub.c46
-rw-r--r--src/Specific/montgomery64_2e384m79x2e376m1/fesub.h6
-rw-r--r--src/Specific/montgomery64_2e389m21/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e389m21/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e389m21/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e389m21/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e401m31/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e401m31/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e401m31/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e401m31/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e413m21/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e413m21/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e413m21/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e413m21/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e414m17/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e414m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e414m17/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e414m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e416m2e208m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e444m17/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e444m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e444m17/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e444m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/feadd.c50
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/fenz.c28
-rw-r--r--src/Specific/montgomery64_2e448m2e224m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e450m2e225m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e452m3/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e452m3/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e452m3/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e452m3/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e468m17/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e468m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e468m17/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e468m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e480m2e240m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e488m17/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e488m17/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e488m17/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e488m17/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e489m21/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e489m21/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e489m21/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e489m21/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e495m31/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e495m31/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e495m31/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e495m31/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e510m290x2e496m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e511m187/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e511m187/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e511m187/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e511m187/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e511m481/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e511m481/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e511m481/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e511m481/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e512m491x2e496m1/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e512m569/feadd.c54
-rw-r--r--src/Specific/montgomery64_2e512m569/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e512m569/fenz.c29
-rw-r--r--src/Specific/montgomery64_2e512m569/fenz.h6
-rw-r--r--src/Specific/montgomery64_2e521m1/feadd.c58
-rw-r--r--src/Specific/montgomery64_2e521m1/feadd.h6
-rw-r--r--src/Specific/montgomery64_2e521m1/fenz.c30
-rw-r--r--src/Specific/montgomery64_2e521m1/fenz.h6
-rw-r--r--src/Specific/solinas32_2e127m1/femul.c56
-rw-r--r--src/Specific/solinas32_2e127m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e127m1/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e127m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e127m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e127m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e129m25/femul.c56
-rw-r--r--src/Specific/solinas32_2e129m25/femul.h6
-rw-r--r--src/Specific/solinas32_2e129m25/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e129m25/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e129m25/freeze.c25
-rw-r--r--src/Specific/solinas32_2e129m25/freeze.h6
-rw-r--r--src/Specific/solinas32_2e130m5/femul.c51
-rw-r--r--src/Specific/solinas32_2e130m5/femul.h6
-rw-r--r--src/Specific/solinas32_2e130m5/fesquare.c51
-rw-r--r--src/Specific/solinas32_2e130m5/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e130m5/freeze.c25
-rw-r--r--src/Specific/solinas32_2e130m5/freeze.h6
-rw-r--r--src/Specific/solinas32_2e137m13/femul.c66
-rw-r--r--src/Specific/solinas32_2e137m13/femul.h6
-rw-r--r--src/Specific/solinas32_2e137m13/fesquare.c66
-rw-r--r--src/Specific/solinas32_2e137m13/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e137m13/freeze.c25
-rw-r--r--src/Specific/solinas32_2e137m13/freeze.h6
-rw-r--r--src/Specific/solinas32_2e140m27/femul.c56
-rw-r--r--src/Specific/solinas32_2e140m27/femul.h6
-rw-r--r--src/Specific/solinas32_2e140m27/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e140m27/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e140m27/freeze.c25
-rw-r--r--src/Specific/solinas32_2e140m27/freeze.h6
-rw-r--r--src/Specific/solinas32_2e141m9/femul.c56
-rw-r--r--src/Specific/solinas32_2e141m9/femul.h6
-rw-r--r--src/Specific/solinas32_2e141m9/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e141m9/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e141m9/freeze.c25
-rw-r--r--src/Specific/solinas32_2e141m9/freeze.h6
-rw-r--r--src/Specific/solinas32_2e150m3/femul.c51
-rw-r--r--src/Specific/solinas32_2e150m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e150m3/fesquare.c51
-rw-r--r--src/Specific/solinas32_2e150m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e150m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e150m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e150m5/femul.c56
-rw-r--r--src/Specific/solinas32_2e150m5/femul.h6
-rw-r--r--src/Specific/solinas32_2e150m5/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e150m5/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e150m5/freeze.c25
-rw-r--r--src/Specific/solinas32_2e150m5/freeze.h6
-rw-r--r--src/Specific/solinas32_2e152m17/femul.c56
-rw-r--r--src/Specific/solinas32_2e152m17/femul.h6
-rw-r--r--src/Specific/solinas32_2e152m17/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e152m17/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e152m17/freeze.c25
-rw-r--r--src/Specific/solinas32_2e152m17/freeze.h6
-rw-r--r--src/Specific/solinas32_2e158m15/femul.c56
-rw-r--r--src/Specific/solinas32_2e158m15/femul.h6
-rw-r--r--src/Specific/solinas32_2e158m15/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e158m15/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e158m15/freeze.c25
-rw-r--r--src/Specific/solinas32_2e158m15/freeze.h6
-rw-r--r--src/Specific/solinas32_2e165m25/femul.c71
-rw-r--r--src/Specific/solinas32_2e165m25/femul.h6
-rw-r--r--src/Specific/solinas32_2e165m25/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e165m25/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e165m25/freeze.c25
-rw-r--r--src/Specific/solinas32_2e165m25/freeze.h6
-rw-r--r--src/Specific/solinas32_2e166m5/femul.c81
-rw-r--r--src/Specific/solinas32_2e166m5/femul.h6
-rw-r--r--src/Specific/solinas32_2e166m5/fesquare.c81
-rw-r--r--src/Specific/solinas32_2e166m5/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e166m5/freeze.c25
-rw-r--r--src/Specific/solinas32_2e166m5/freeze.h6
-rw-r--r--src/Specific/solinas32_2e171m19/femul.c71
-rw-r--r--src/Specific/solinas32_2e171m19/femul.h6
-rw-r--r--src/Specific/solinas32_2e171m19/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e171m19/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e171m19/freeze.c25
-rw-r--r--src/Specific/solinas32_2e171m19/freeze.h6
-rw-r--r--src/Specific/solinas32_2e174m17/femul.c71
-rw-r--r--src/Specific/solinas32_2e174m17/femul.h6
-rw-r--r--src/Specific/solinas32_2e174m17/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e174m17/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e174m17/freeze.c25
-rw-r--r--src/Specific/solinas32_2e174m17/freeze.h6
-rw-r--r--src/Specific/solinas32_2e174m3/femul.c56
-rw-r--r--src/Specific/solinas32_2e174m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e174m3/fesquare.c56
-rw-r--r--src/Specific/solinas32_2e174m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e174m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e174m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e189m25/femul.c61
-rw-r--r--src/Specific/solinas32_2e189m25/femul.h6
-rw-r--r--src/Specific/solinas32_2e189m25/fesquare.c61
-rw-r--r--src/Specific/solinas32_2e189m25/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e189m25/freeze.c25
-rw-r--r--src/Specific/solinas32_2e189m25/freeze.h6
-rw-r--r--src/Specific/solinas32_2e190m11/femul.c61
-rw-r--r--src/Specific/solinas32_2e190m11/femul.h6
-rw-r--r--src/Specific/solinas32_2e190m11/fesquare.c61
-rw-r--r--src/Specific/solinas32_2e190m11/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e190m11/freeze.c25
-rw-r--r--src/Specific/solinas32_2e190m11/freeze.h6
-rw-r--r--src/Specific/solinas32_2e191m19/femul.c76
-rw-r--r--src/Specific/solinas32_2e191m19/femul.h6
-rw-r--r--src/Specific/solinas32_2e191m19/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e191m19/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e191m19/freeze.c25
-rw-r--r--src/Specific/solinas32_2e191m19/freeze.h6
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/femul.c77
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/fesquare.c77
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e192m2e64m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e194m33/femul.c66
-rw-r--r--src/Specific/solinas32_2e194m33/femul.h6
-rw-r--r--src/Specific/solinas32_2e194m33/fesquare.c66
-rw-r--r--src/Specific/solinas32_2e194m33/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e194m33/freeze.c25
-rw-r--r--src/Specific/solinas32_2e194m33/freeze.h6
-rw-r--r--src/Specific/solinas32_2e196m15/femul.c61
-rw-r--r--src/Specific/solinas32_2e196m15/femul.h6
-rw-r--r--src/Specific/solinas32_2e196m15/fesquare.c61
-rw-r--r--src/Specific/solinas32_2e196m15/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e196m15/freeze.c25
-rw-r--r--src/Specific/solinas32_2e196m15/freeze.h6
-rw-r--r--src/Specific/solinas32_2e198m17/femul.c71
-rw-r--r--src/Specific/solinas32_2e198m17/femul.h6
-rw-r--r--src/Specific/solinas32_2e198m17/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e198m17/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e198m17/freeze.c25
-rw-r--r--src/Specific/solinas32_2e198m17/freeze.h6
-rw-r--r--src/Specific/solinas32_2e205m45x2e198m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e205m45x2e198m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e206m5/femul.c86
-rw-r--r--src/Specific/solinas32_2e206m5/femul.h6
-rw-r--r--src/Specific/solinas32_2e206m5/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e206m5/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e206m5/freeze.c25
-rw-r--r--src/Specific/solinas32_2e206m5/freeze.h6
-rw-r--r--src/Specific/solinas32_2e212m29/femul.c66
-rw-r--r--src/Specific/solinas32_2e212m29/femul.h6
-rw-r--r--src/Specific/solinas32_2e212m29/fesquare.c66
-rw-r--r--src/Specific/solinas32_2e212m29/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e212m29/freeze.c25
-rw-r--r--src/Specific/solinas32_2e212m29/freeze.h6
-rw-r--r--src/Specific/solinas32_2e213m3/femul.c101
-rw-r--r--src/Specific/solinas32_2e213m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e213m3/fesquare.c101
-rw-r--r--src/Specific/solinas32_2e213m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e213m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e213m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/femul.c83
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/fesquare.c83
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e216m2e108m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e221m3/femul.c76
-rw-r--r--src/Specific/solinas32_2e221m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e221m3/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e221m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e221m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e221m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e222m117/femul.c76
-rw-r--r--src/Specific/solinas32_2e222m117/femul.h6
-rw-r--r--src/Specific/solinas32_2e222m117/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e222m117/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e222m117/freeze.c25
-rw-r--r--src/Specific/solinas32_2e222m117/freeze.h6
-rw-r--r--src/Specific/solinas32_2e224m2e96p1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e224m2e96p1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e226m5/femul.c66
-rw-r--r--src/Specific/solinas32_2e226m5/femul.h6
-rw-r--r--src/Specific/solinas32_2e226m5/fesquare.c66
-rw-r--r--src/Specific/solinas32_2e226m5/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e226m5/freeze.c25
-rw-r--r--src/Specific/solinas32_2e226m5/freeze.h6
-rw-r--r--src/Specific/solinas32_2e230m27/femul.c76
-rw-r--r--src/Specific/solinas32_2e230m27/femul.h6
-rw-r--r--src/Specific/solinas32_2e230m27/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e230m27/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e230m27/freeze.c25
-rw-r--r--src/Specific/solinas32_2e230m27/freeze.h6
-rw-r--r--src/Specific/solinas32_2e235m15/femul.c71
-rw-r--r--src/Specific/solinas32_2e235m15/femul.h6
-rw-r--r--src/Specific/solinas32_2e235m15/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e235m15/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e235m15/freeze.c25
-rw-r--r--src/Specific/solinas32_2e235m15/freeze.h6
-rw-r--r--src/Specific/solinas32_2e243m9/femul.c71
-rw-r--r--src/Specific/solinas32_2e243m9/femul.h6
-rw-r--r--src/Specific/solinas32_2e243m9/fesquare.c71
-rw-r--r--src/Specific/solinas32_2e243m9/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e243m9/freeze.c25
-rw-r--r--src/Specific/solinas32_2e243m9/freeze.h6
-rw-r--r--src/Specific/solinas32_2e251m9/femul.c76
-rw-r--r--src/Specific/solinas32_2e251m9/femul.h6
-rw-r--r--src/Specific/solinas32_2e251m9/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e251m9/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e251m9/freeze.c25
-rw-r--r--src/Specific/solinas32_2e251m9/freeze.h6
-rw-r--r--src/Specific/solinas32_2e254m127x2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e254m127x2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e255m19/femul.c76
-rw-r--r--src/Specific/solinas32_2e255m19/femul.h6
-rw-r--r--src/Specific/solinas32_2e255m19/fesquare.c76
-rw-r--r--src/Specific/solinas32_2e255m19/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e255m19/freeze.c25
-rw-r--r--src/Specific/solinas32_2e255m19/freeze.h6
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/femul.c84
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c84
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e255m2e4m2e1m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e255m765/femul.c86
-rw-r--r--src/Specific/solinas32_2e255m765/femul.h6
-rw-r--r--src/Specific/solinas32_2e255m765/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e255m765/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e255m765/freeze.c25
-rw-r--r--src/Specific/solinas32_2e255m765/freeze.h6
-rw-r--r--src/Specific/solinas32_2e256m189/femul.c86
-rw-r--r--src/Specific/solinas32_2e256m189/femul.h6
-rw-r--r--src/Specific/solinas32_2e256m189/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e256m189/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e256m189/freeze.c25
-rw-r--r--src/Specific/solinas32_2e256m189/freeze.h6
-rw-r--r--src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/femul.c97
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/femul.h6
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/fesquare.c97
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/freeze.c25
-rw-r--r--src/Specific/solinas32_2e256m2e32m977/freeze.h6
-rw-r--r--src/Specific/solinas32_2e256m88x2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e256m88x2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e266m3/femul.c86
-rw-r--r--src/Specific/solinas32_2e266m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e266m3/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e266m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e266m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e266m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e285m9/femul.c101
-rw-r--r--src/Specific/solinas32_2e285m9/femul.h6
-rw-r--r--src/Specific/solinas32_2e285m9/fesquare.c101
-rw-r--r--src/Specific/solinas32_2e285m9/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e285m9/freeze.c25
-rw-r--r--src/Specific/solinas32_2e285m9/freeze.h6
-rw-r--r--src/Specific/solinas32_2e291m19/femul.c86
-rw-r--r--src/Specific/solinas32_2e291m19/femul.h6
-rw-r--r--src/Specific/solinas32_2e291m19/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e291m19/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e291m19/freeze.c25
-rw-r--r--src/Specific/solinas32_2e291m19/freeze.h6
-rw-r--r--src/Specific/solinas32_2e321m9/femul.c106
-rw-r--r--src/Specific/solinas32_2e321m9/femul.h6
-rw-r--r--src/Specific/solinas32_2e321m9/femulDisplay.log108
-rw-r--r--src/Specific/solinas32_2e321m9/fesquare.c106
-rw-r--r--src/Specific/solinas32_2e321m9/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e321m9/freeze.c25
-rw-r--r--src/Specific/solinas32_2e321m9/freeze.h6
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/femul.c119
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/fesquare.c119
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e322m2e161m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e336m17/femul.c96
-rw-r--r--src/Specific/solinas32_2e336m17/femul.h6
-rw-r--r--src/Specific/solinas32_2e336m17/fesquare.c96
-rw-r--r--src/Specific/solinas32_2e336m17/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e336m17/freeze.c25
-rw-r--r--src/Specific/solinas32_2e336m17/freeze.h6
-rw-r--r--src/Specific/solinas32_2e336m3/femul.c86
-rw-r--r--src/Specific/solinas32_2e336m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e336m3/fesquare.c86
-rw-r--r--src/Specific/solinas32_2e336m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e336m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e336m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e338m15/femul.c91
-rw-r--r--src/Specific/solinas32_2e338m15/femul.h6
-rw-r--r--src/Specific/solinas32_2e338m15/fesquare.c91
-rw-r--r--src/Specific/solinas32_2e338m15/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e338m15/freeze.c25
-rw-r--r--src/Specific/solinas32_2e338m15/freeze.h6
-rw-r--r--src/Specific/solinas32_2e369m25/femul.c106
-rw-r--r--src/Specific/solinas32_2e369m25/femul.h6
-rw-r--r--src/Specific/solinas32_2e369m25/femulDisplay.log136
-rw-r--r--src/Specific/solinas32_2e369m25/fesquare.c106
-rw-r--r--src/Specific/solinas32_2e369m25/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e369m25/fesquareDisplay.log136
-rw-r--r--src/Specific/solinas32_2e369m25/freeze.c25
-rw-r--r--src/Specific/solinas32_2e369m25/freeze.h6
-rw-r--r--src/Specific/solinas32_2e369m25/freezeDisplay.log32
-rw-r--r--src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freezeDisplay.log28
-rw-r--r--src/Specific/solinas32_2e384m5x2e368m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e384m5x2e368m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e384m5x2e368m1/freezeDisplay.log56
-rw-r--r--src/Specific/solinas32_2e384m79x2e376m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e384m79x2e376m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e384m79x2e376m1/freezeDisplay.log56
-rw-r--r--src/Specific/solinas32_2e401m31/femul.c106
-rw-r--r--src/Specific/solinas32_2e401m31/femul.h6
-rw-r--r--src/Specific/solinas32_2e401m31/femulDisplay.log138
-rw-r--r--src/Specific/solinas32_2e401m31/fesquare.c106
-rw-r--r--src/Specific/solinas32_2e401m31/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e401m31/fesquareDisplay.log138
-rw-r--r--src/Specific/solinas32_2e401m31/freeze.c25
-rw-r--r--src/Specific/solinas32_2e401m31/freeze.h6
-rw-r--r--src/Specific/solinas32_2e401m31/freezeDisplay.log32
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/femul.c131
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/femulDisplay.log101
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/fesquare.c131
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/fesquareDisplay.log101
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e416m2e208m1/freezeDisplay.log32
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/femul.c131
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/femulDisplay.log101
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/fesquare.c131
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/fesquareDisplay.log101
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e448m2e224m1/freezeDisplay.log32
-rw-r--r--src/Specific/solinas32_2e450m2e225m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e450m2e225m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e452m3/femul.c106
-rw-r--r--src/Specific/solinas32_2e452m3/femul.h6
-rw-r--r--src/Specific/solinas32_2e452m3/femulDisplay.log154
-rw-r--r--src/Specific/solinas32_2e452m3/fesquare.c106
-rw-r--r--src/Specific/solinas32_2e452m3/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e452m3/fesquareDisplay.log154
-rw-r--r--src/Specific/solinas32_2e452m3/freeze.c25
-rw-r--r--src/Specific/solinas32_2e452m3/freeze.h6
-rw-r--r--src/Specific/solinas32_2e452m3/freezeDisplay.log112
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/femul.c131
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/femul.h6
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/femulDisplay.log101
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/fesquare.c131
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/fesquare.h6
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/fesquareDisplay.log101
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas32_2e480m2e240m1/freezeDisplay.log32
-rw-r--r--src/Specific/solinas64_2e127m1/femul.c41
-rw-r--r--src/Specific/solinas64_2e127m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e127m1/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e127m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e127m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e127m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e129m25/femul.c41
-rw-r--r--src/Specific/solinas64_2e129m25/femul.h6
-rw-r--r--src/Specific/solinas64_2e129m25/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e129m25/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e129m25/freeze.c25
-rw-r--r--src/Specific/solinas64_2e129m25/freeze.h6
-rw-r--r--src/Specific/solinas64_2e130m5/femul.c41
-rw-r--r--src/Specific/solinas64_2e130m5/femul.h6
-rw-r--r--src/Specific/solinas64_2e130m5/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e130m5/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e130m5/freeze.c25
-rw-r--r--src/Specific/solinas64_2e130m5/freeze.h6
-rw-r--r--src/Specific/solinas64_2e137m13/femul.c46
-rw-r--r--src/Specific/solinas64_2e137m13/femul.h6
-rw-r--r--src/Specific/solinas64_2e137m13/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e137m13/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e137m13/freeze.c25
-rw-r--r--src/Specific/solinas64_2e137m13/freeze.h6
-rw-r--r--src/Specific/solinas64_2e140m27/femul.c46
-rw-r--r--src/Specific/solinas64_2e140m27/femul.h6
-rw-r--r--src/Specific/solinas64_2e140m27/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e140m27/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e140m27/freeze.c25
-rw-r--r--src/Specific/solinas64_2e140m27/freeze.h6
-rw-r--r--src/Specific/solinas64_2e141m9/femul.c41
-rw-r--r--src/Specific/solinas64_2e141m9/femul.h6
-rw-r--r--src/Specific/solinas64_2e141m9/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e141m9/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e141m9/freeze.c25
-rw-r--r--src/Specific/solinas64_2e141m9/freeze.h6
-rw-r--r--src/Specific/solinas64_2e150m3/femul.c41
-rw-r--r--src/Specific/solinas64_2e150m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e150m3/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e150m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e150m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e150m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e150m5/femul.c41
-rw-r--r--src/Specific/solinas64_2e150m5/femul.h6
-rw-r--r--src/Specific/solinas64_2e150m5/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e150m5/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e150m5/freeze.c25
-rw-r--r--src/Specific/solinas64_2e150m5/freeze.h6
-rw-r--r--src/Specific/solinas64_2e152m17/femul.c46
-rw-r--r--src/Specific/solinas64_2e152m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e152m17/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e152m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e152m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e152m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e158m15/femul.c46
-rw-r--r--src/Specific/solinas64_2e158m15/femul.h6
-rw-r--r--src/Specific/solinas64_2e158m15/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e158m15/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e158m15/freeze.c25
-rw-r--r--src/Specific/solinas64_2e158m15/freeze.h6
-rw-r--r--src/Specific/solinas64_2e165m25/femul.c41
-rw-r--r--src/Specific/solinas64_2e165m25/femul.h6
-rw-r--r--src/Specific/solinas64_2e165m25/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e165m25/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e165m25/freeze.c25
-rw-r--r--src/Specific/solinas64_2e165m25/freeze.h6
-rw-r--r--src/Specific/solinas64_2e166m5/femul.c41
-rw-r--r--src/Specific/solinas64_2e166m5/femul.h6
-rw-r--r--src/Specific/solinas64_2e166m5/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e166m5/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e166m5/freeze.c25
-rw-r--r--src/Specific/solinas64_2e166m5/freeze.h6
-rw-r--r--src/Specific/solinas64_2e171m19/femul.c41
-rw-r--r--src/Specific/solinas64_2e171m19/femul.h6
-rw-r--r--src/Specific/solinas64_2e171m19/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e171m19/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e171m19/freeze.c25
-rw-r--r--src/Specific/solinas64_2e171m19/freeze.h6
-rw-r--r--src/Specific/solinas64_2e174m17/femul.c41
-rw-r--r--src/Specific/solinas64_2e174m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e174m17/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e174m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e174m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e174m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e174m3/femul.c41
-rw-r--r--src/Specific/solinas64_2e174m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e174m3/fesquare.c41
-rw-r--r--src/Specific/solinas64_2e174m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e174m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e174m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e189m25/femul.c46
-rw-r--r--src/Specific/solinas64_2e189m25/femul.h6
-rw-r--r--src/Specific/solinas64_2e189m25/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e189m25/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e189m25/freeze.c25
-rw-r--r--src/Specific/solinas64_2e189m25/freeze.h6
-rw-r--r--src/Specific/solinas64_2e190m11/femul.c46
-rw-r--r--src/Specific/solinas64_2e190m11/femul.h6
-rw-r--r--src/Specific/solinas64_2e190m11/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e190m11/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e190m11/freeze.c25
-rw-r--r--src/Specific/solinas64_2e190m11/freeze.h6
-rw-r--r--src/Specific/solinas64_2e191m19/femul.c51
-rw-r--r--src/Specific/solinas64_2e191m19/femul.h6
-rw-r--r--src/Specific/solinas64_2e191m19/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e191m19/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e191m19/freeze.c25
-rw-r--r--src/Specific/solinas64_2e191m19/freeze.h6
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/femul.c57
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/fesquare.c57
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e192m2e64m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e194m33/femul.c46
-rw-r--r--src/Specific/solinas64_2e194m33/femul.h6
-rw-r--r--src/Specific/solinas64_2e194m33/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e194m33/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e194m33/freeze.c25
-rw-r--r--src/Specific/solinas64_2e194m33/freeze.h6
-rw-r--r--src/Specific/solinas64_2e196m15/femul.c46
-rw-r--r--src/Specific/solinas64_2e196m15/femul.h6
-rw-r--r--src/Specific/solinas64_2e196m15/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e196m15/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e196m15/freeze.c25
-rw-r--r--src/Specific/solinas64_2e196m15/freeze.h6
-rw-r--r--src/Specific/solinas64_2e198m17/femul.c46
-rw-r--r--src/Specific/solinas64_2e198m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e198m17/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e198m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e198m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e198m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e205m45x2e198m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e205m45x2e198m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e206m5/femul.c46
-rw-r--r--src/Specific/solinas64_2e206m5/femul.h6
-rw-r--r--src/Specific/solinas64_2e206m5/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e206m5/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e206m5/freeze.c25
-rw-r--r--src/Specific/solinas64_2e206m5/freeze.h6
-rw-r--r--src/Specific/solinas64_2e212m29/femul.c46
-rw-r--r--src/Specific/solinas64_2e212m29/femul.h6
-rw-r--r--src/Specific/solinas64_2e212m29/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e212m29/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e212m29/freeze.c25
-rw-r--r--src/Specific/solinas64_2e212m29/freeze.h6
-rw-r--r--src/Specific/solinas64_2e213m3/femul.c46
-rw-r--r--src/Specific/solinas64_2e213m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e213m3/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e213m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e213m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e213m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/femul.c59
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/fesquare.c59
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e216m2e108m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e221m3/femul.c46
-rw-r--r--src/Specific/solinas64_2e221m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e221m3/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e221m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e221m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e221m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e222m117/femul.c46
-rw-r--r--src/Specific/solinas64_2e222m117/femul.h6
-rw-r--r--src/Specific/solinas64_2e222m117/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e222m117/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e222m117/freeze.c25
-rw-r--r--src/Specific/solinas64_2e222m117/freeze.h6
-rw-r--r--src/Specific/solinas64_2e224m2e96p1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e224m2e96p1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e226m5/femul.c46
-rw-r--r--src/Specific/solinas64_2e226m5/femul.h6
-rw-r--r--src/Specific/solinas64_2e226m5/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e226m5/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e226m5/freeze.c25
-rw-r--r--src/Specific/solinas64_2e226m5/freeze.h6
-rw-r--r--src/Specific/solinas64_2e230m27/femul.c46
-rw-r--r--src/Specific/solinas64_2e230m27/femul.h6
-rw-r--r--src/Specific/solinas64_2e230m27/fesquare.c46
-rw-r--r--src/Specific/solinas64_2e230m27/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e230m27/freeze.c25
-rw-r--r--src/Specific/solinas64_2e230m27/freeze.h6
-rw-r--r--src/Specific/solinas64_2e235m15/femul.c51
-rw-r--r--src/Specific/solinas64_2e235m15/femul.h6
-rw-r--r--src/Specific/solinas64_2e235m15/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e235m15/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e235m15/freeze.c25
-rw-r--r--src/Specific/solinas64_2e235m15/freeze.h6
-rw-r--r--src/Specific/solinas64_2e243m9/femul.c56
-rw-r--r--src/Specific/solinas64_2e243m9/femul.h6
-rw-r--r--src/Specific/solinas64_2e243m9/fesquare.c56
-rw-r--r--src/Specific/solinas64_2e243m9/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e243m9/freeze.c25
-rw-r--r--src/Specific/solinas64_2e243m9/freeze.h6
-rw-r--r--src/Specific/solinas64_2e251m9/femul.c51
-rw-r--r--src/Specific/solinas64_2e251m9/femul.h6
-rw-r--r--src/Specific/solinas64_2e251m9/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e251m9/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e251m9/freeze.c26
-rw-r--r--src/Specific/solinas64_2e251m9/freeze.h6
-rw-r--r--src/Specific/solinas64_2e254m127x2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e254m127x2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e255m19/femul.c51
-rw-r--r--src/Specific/solinas64_2e255m19/femul.h6
-rw-r--r--src/Specific/solinas64_2e255m19/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e255m19/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e255m19/freeze.c42
-rw-r--r--src/Specific/solinas64_2e255m19/freeze.h6
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/femul.c64
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c64
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c42
-rw-r--r--src/Specific/solinas64_2e255m2e4m2e1m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e255m765/femul.c51
-rw-r--r--src/Specific/solinas64_2e255m765/femul.h6
-rw-r--r--src/Specific/solinas64_2e255m765/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e255m765/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e255m765/freeze.c42
-rw-r--r--src/Specific/solinas64_2e255m765/freeze.h6
-rw-r--r--src/Specific/solinas64_2e256m189/femul.c51
-rw-r--r--src/Specific/solinas64_2e256m189/femul.h6
-rw-r--r--src/Specific/solinas64_2e256m189/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e256m189/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e256m189/freeze.c25
-rw-r--r--src/Specific/solinas64_2e256m189/freeze.h6
-rw-r--r--src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/femul.c59
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/femul.h6
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/fesquare.c59
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/freeze.c25
-rw-r--r--src/Specific/solinas64_2e256m2e32m977/freeze.h6
-rw-r--r--src/Specific/solinas64_2e256m88x2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e256m88x2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e266m3/femul.c51
-rw-r--r--src/Specific/solinas64_2e266m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e266m3/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e266m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e266m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e266m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e285m9/femul.c51
-rw-r--r--src/Specific/solinas64_2e285m9/femul.h6
-rw-r--r--src/Specific/solinas64_2e285m9/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e285m9/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e285m9/freeze.c25
-rw-r--r--src/Specific/solinas64_2e285m9/freeze.h6
-rw-r--r--src/Specific/solinas64_2e291m19/femul.c51
-rw-r--r--src/Specific/solinas64_2e291m19/femul.h6
-rw-r--r--src/Specific/solinas64_2e291m19/fesquare.c51
-rw-r--r--src/Specific/solinas64_2e291m19/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e291m19/freeze.c25
-rw-r--r--src/Specific/solinas64_2e291m19/freeze.h6
-rw-r--r--src/Specific/solinas64_2e321m9/femul.c56
-rw-r--r--src/Specific/solinas64_2e321m9/femul.h6
-rw-r--r--src/Specific/solinas64_2e321m9/fesquare.c56
-rw-r--r--src/Specific/solinas64_2e321m9/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e321m9/freeze.c25
-rw-r--r--src/Specific/solinas64_2e321m9/freeze.h6
-rw-r--r--src/Specific/solinas64_2e322m2e161m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e322m2e161m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e336m17/femul.c56
-rw-r--r--src/Specific/solinas64_2e336m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e336m17/fesquare.c56
-rw-r--r--src/Specific/solinas64_2e336m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e336m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e336m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e336m3/femul.c56
-rw-r--r--src/Specific/solinas64_2e336m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e336m3/fesquare.c56
-rw-r--r--src/Specific/solinas64_2e336m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e336m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e336m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e338m15/femul.c56
-rw-r--r--src/Specific/solinas64_2e338m15/femul.h6
-rw-r--r--src/Specific/solinas64_2e338m15/fesquare.c56
-rw-r--r--src/Specific/solinas64_2e338m15/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e338m15/freeze.c25
-rw-r--r--src/Specific/solinas64_2e338m15/freeze.h6
-rw-r--r--src/Specific/solinas64_2e369m25/femul.c66
-rw-r--r--src/Specific/solinas64_2e369m25/femul.h6
-rw-r--r--src/Specific/solinas64_2e369m25/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e369m25/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e369m25/freeze.c25
-rw-r--r--src/Specific/solinas64_2e369m25/freeze.h6
-rw-r--r--src/Specific/solinas64_2e379m19/femul.c61
-rw-r--r--src/Specific/solinas64_2e379m19/femul.h6
-rw-r--r--src/Specific/solinas64_2e379m19/fesquare.c61
-rw-r--r--src/Specific/solinas64_2e379m19/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e379m19/freeze.c25
-rw-r--r--src/Specific/solinas64_2e379m19/freeze.h6
-rw-r--r--src/Specific/solinas64_2e382m105/femul.c76
-rw-r--r--src/Specific/solinas64_2e382m105/femul.h6
-rw-r--r--src/Specific/solinas64_2e382m105/fesquare.c76
-rw-r--r--src/Specific/solinas64_2e382m105/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e382m105/freeze.c25
-rw-r--r--src/Specific/solinas64_2e382m105/freeze.h6
-rw-r--r--src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e384m317/femul.c66
-rw-r--r--src/Specific/solinas64_2e384m317/femul.h6
-rw-r--r--src/Specific/solinas64_2e384m317/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e384m317/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e384m317/freeze.c25
-rw-r--r--src/Specific/solinas64_2e384m317/freeze.h6
-rw-r--r--src/Specific/solinas64_2e384m5x2e368m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e384m5x2e368m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e384m79x2e376m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e384m79x2e376m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e401m31/femul.c66
-rw-r--r--src/Specific/solinas64_2e401m31/femul.h6
-rw-r--r--src/Specific/solinas64_2e401m31/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e401m31/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e401m31/freeze.c26
-rw-r--r--src/Specific/solinas64_2e401m31/freeze.h6
-rw-r--r--src/Specific/solinas64_2e413m21/femul.c61
-rw-r--r--src/Specific/solinas64_2e413m21/femul.h6
-rw-r--r--src/Specific/solinas64_2e413m21/fesquare.c61
-rw-r--r--src/Specific/solinas64_2e413m21/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e413m21/freeze.c25
-rw-r--r--src/Specific/solinas64_2e413m21/freeze.h6
-rw-r--r--src/Specific/solinas64_2e414m17/femul.c71
-rw-r--r--src/Specific/solinas64_2e414m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e414m17/fesquare.c71
-rw-r--r--src/Specific/solinas64_2e414m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e414m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e414m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/femul.c83
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/fesquare.c83
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e416m2e208m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e444m17/femul.c66
-rw-r--r--src/Specific/solinas64_2e444m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e444m17/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e444m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e444m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e444m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/femul.c83
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/fesquare.c83
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e448m2e224m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/femul.c83
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/fesquare.c83
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e450m2e225m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e452m3/femul.c66
-rw-r--r--src/Specific/solinas64_2e452m3/femul.h6
-rw-r--r--src/Specific/solinas64_2e452m3/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e452m3/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e452m3/freeze.c25
-rw-r--r--src/Specific/solinas64_2e452m3/freeze.h6
-rw-r--r--src/Specific/solinas64_2e468m17/femul.c66
-rw-r--r--src/Specific/solinas64_2e468m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e468m17/fesquare.c66
-rw-r--r--src/Specific/solinas64_2e468m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e468m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e468m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/femul.c83
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/fesquare.c83
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e480m2e240m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e488m17/femul.c106
-rw-r--r--src/Specific/solinas64_2e488m17/femul.h6
-rw-r--r--src/Specific/solinas64_2e488m17/femulDisplay.log76
-rw-r--r--src/Specific/solinas64_2e488m17/fesquare.c106
-rw-r--r--src/Specific/solinas64_2e488m17/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e488m17/fesquareDisplay.log76
-rw-r--r--src/Specific/solinas64_2e488m17/freeze.c25
-rw-r--r--src/Specific/solinas64_2e488m17/freeze.h6
-rw-r--r--src/Specific/solinas64_2e488m17/freezeDisplay.log56
-rw-r--r--src/Specific/solinas64_2e489m21/femul.c71
-rw-r--r--src/Specific/solinas64_2e489m21/femul.h6
-rw-r--r--src/Specific/solinas64_2e489m21/fesquare.c71
-rw-r--r--src/Specific/solinas64_2e489m21/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e489m21/freeze.c25
-rw-r--r--src/Specific/solinas64_2e489m21/freeze.h6
-rw-r--r--src/Specific/solinas64_2e495m31/femul.c71
-rw-r--r--src/Specific/solinas64_2e495m31/femul.h6
-rw-r--r--src/Specific/solinas64_2e495m31/fesquare.c71
-rw-r--r--src/Specific/solinas64_2e495m31/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e495m31/freeze.c25
-rw-r--r--src/Specific/solinas64_2e495m31/freeze.h6
-rw-r--r--src/Specific/solinas64_2e510m290x2e496m1/freeze.c62
-rw-r--r--src/Specific/solinas64_2e510m290x2e496m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e511m187/femul.c76
-rw-r--r--src/Specific/solinas64_2e511m187/femul.h6
-rw-r--r--src/Specific/solinas64_2e511m187/fesquare.c76
-rw-r--r--src/Specific/solinas64_2e511m187/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e511m187/freeze.c25
-rw-r--r--src/Specific/solinas64_2e511m187/freeze.h6
-rw-r--r--src/Specific/solinas64_2e511m481/femul.c76
-rw-r--r--src/Specific/solinas64_2e511m481/femul.h6
-rw-r--r--src/Specific/solinas64_2e511m481/fesquare.c76
-rw-r--r--src/Specific/solinas64_2e511m481/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e511m481/freeze.c25
-rw-r--r--src/Specific/solinas64_2e511m481/freeze.h6
-rw-r--r--src/Specific/solinas64_2e512m491x2e496m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e512m491x2e496m1/freeze.h6
-rw-r--r--src/Specific/solinas64_2e512m569/femul.c76
-rw-r--r--src/Specific/solinas64_2e512m569/femul.h6
-rw-r--r--src/Specific/solinas64_2e512m569/fesquare.c76
-rw-r--r--src/Specific/solinas64_2e512m569/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e512m569/freeze.c25
-rw-r--r--src/Specific/solinas64_2e512m569/freeze.h6
-rw-r--r--src/Specific/solinas64_2e521m1/femul.c76
-rw-r--r--src/Specific/solinas64_2e521m1/femul.h6
-rw-r--r--src/Specific/solinas64_2e521m1/fesquare.c76
-rw-r--r--src/Specific/solinas64_2e521m1/fesquare.h6
-rw-r--r--src/Specific/solinas64_2e521m1/freeze.c25
-rw-r--r--src/Specific/solinas64_2e521m1/freeze.h6
1899 files changed, 58657 insertions, 645 deletions
diff --git a/src/Specific/montgomery32_2e127m1/feadd.c b/src/Specific/montgomery32_2e127m1/feadd.c
new file mode 100644
index 000000000..aaa652c86
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint32_t x17; uint8_t x18 = _addcarryx_u32(0x0, x5, x11, &x17);
+{ uint32_t x20; uint8_t x21 = _addcarryx_u32(x18, x7, x13, &x20);
+{ uint32_t x23; uint8_t x24 = _addcarryx_u32(x21, x9, x15, &x23);
+{ uint32_t x26; uint8_t x27 = _addcarryx_u32(x24, x8, x14, &x26);
+{ uint32_t x29; uint8_t x30 = _subborrow_u32(0x0, x17, 0xffffffff, &x29);
+{ uint32_t x32; uint8_t x33 = _subborrow_u32(x30, x20, 0xffffffff, &x32);
+{ uint32_t x35; uint8_t x36 = _subborrow_u32(x33, x23, 0xffffffff, &x35);
+{ uint32_t x38; uint8_t x39 = _subborrow_u32(x36, x26, 0x7fffffff, &x38);
+{ uint32_t _; uint8_t x42 = _subborrow_u32(x39, x27, 0x0, &_);
+{ uint32_t x43 = cmovznz(x42, x38, x26);
+{ uint32_t x44 = cmovznz(x42, x35, x23);
+{ uint32_t x45 = cmovznz(x42, x32, x20);
+{ uint32_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery32_2e127m1/feadd.h b/src/Specific/montgomery32_2e127m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery32_2e127m1/femul.c b/src/Specific/montgomery32_2e127m1/femul.c
new file mode 100644
index 000000000..603b981dd
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/femul.c
@@ -0,0 +1,136 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint32_t x18; uint32_t x17 = _mulx_u32(x5, x11, &x18);
+{ uint32_t x21; uint32_t x20 = _mulx_u32(x5, x13, &x21);
+{ uint32_t x24; uint32_t x23 = _mulx_u32(x5, x15, &x24);
+{ uint32_t x27; uint32_t x26 = _mulx_u32(x5, x14, &x27);
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x18, x20, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x21, x23, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x24, x26, &x35);
+{ uint32_t x38; uint8_t _ = _addcarryx_u32(0x0, x36, x27, &x38);
+{ uint32_t x42; uint32_t x41 = _mulx_u32(x17, 0xffffffff, &x42);
+{ uint32_t x45; uint32_t x44 = _mulx_u32(x17, 0xffffffff, &x45);
+{ uint32_t x48; uint32_t x47 = _mulx_u32(x17, 0xffffffff, &x48);
+{ uint32_t x51; uint32_t x50 = _mulx_u32(x17, 0x7fffffff, &x51);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x42, x44, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x45, x47, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x48, x50, &x59);
+{ uint32_t x62; uint8_t _ = _addcarryx_u32(0x0, x60, x51, &x62);
+{ uint32_t _; uint8_t x66 = _addcarryx_u32(0x0, x17, x41, &_);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x29, x53, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x32, x56, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x35, x59, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x38, x62, &x77);
+{ uint32_t x81; uint32_t x80 = _mulx_u32(x7, x11, &x81);
+{ uint32_t x84; uint32_t x83 = _mulx_u32(x7, x13, &x84);
+{ uint32_t x87; uint32_t x86 = _mulx_u32(x7, x15, &x87);
+{ uint32_t x90; uint32_t x89 = _mulx_u32(x7, x14, &x90);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(0x0, x81, x83, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x84, x86, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x87, x89, &x98);
+{ uint32_t x101; uint8_t _ = _addcarryx_u32(0x0, x99, x90, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(0x0, x68, x80, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x71, x92, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x74, x95, &x110);
+{ uint32_t x113; uint8_t x114 = _addcarryx_u32(x111, x77, x98, &x113);
+{ uint32_t x116; uint8_t x117 = _addcarryx_u32(x114, x78, x101, &x116);
+{ uint32_t x120; uint32_t x119 = _mulx_u32(x104, 0xffffffff, &x120);
+{ uint32_t x123; uint32_t x122 = _mulx_u32(x104, 0xffffffff, &x123);
+{ uint32_t x126; uint32_t x125 = _mulx_u32(x104, 0xffffffff, &x126);
+{ uint32_t x129; uint32_t x128 = _mulx_u32(x104, 0x7fffffff, &x129);
+{ uint32_t x131; uint8_t x132 = _addcarryx_u32(0x0, x120, x122, &x131);
+{ uint32_t x134; uint8_t x135 = _addcarryx_u32(x132, x123, x125, &x134);
+{ uint32_t x137; uint8_t x138 = _addcarryx_u32(x135, x126, x128, &x137);
+{ uint32_t x140; uint8_t _ = _addcarryx_u32(0x0, x138, x129, &x140);
+{ uint32_t _; uint8_t x144 = _addcarryx_u32(0x0, x104, x119, &_);
+{ uint32_t x146; uint8_t x147 = _addcarryx_u32(x144, x107, x131, &x146);
+{ uint32_t x149; uint8_t x150 = _addcarryx_u32(x147, x110, x134, &x149);
+{ uint32_t x152; uint8_t x153 = _addcarryx_u32(x150, x113, x137, &x152);
+{ uint32_t x155; uint8_t x156 = _addcarryx_u32(x153, x116, x140, &x155);
+{ uint8_t x157 = (x156 + x117);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x9, x11, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x9, x13, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x9, x15, &x166);
+{ uint32_t x169; uint32_t x168 = _mulx_u32(x9, x14, &x169);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(0x0, x160, x162, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x163, x165, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x166, x168, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x169, &x180);
+{ uint32_t x183; uint8_t x184 = _addcarryx_u32(0x0, x146, x159, &x183);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x149, x171, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x152, x174, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x155, x177, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x157, x180, &x195);
+{ uint32_t x199; uint32_t x198 = _mulx_u32(x183, 0xffffffff, &x199);
+{ uint32_t x202; uint32_t x201 = _mulx_u32(x183, 0xffffffff, &x202);
+{ uint32_t x205; uint32_t x204 = _mulx_u32(x183, 0xffffffff, &x205);
+{ uint32_t x208; uint32_t x207 = _mulx_u32(x183, 0x7fffffff, &x208);
+{ uint32_t x210; uint8_t x211 = _addcarryx_u32(0x0, x199, x201, &x210);
+{ uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x202, x204, &x213);
+{ uint32_t x216; uint8_t x217 = _addcarryx_u32(x214, x205, x207, &x216);
+{ uint32_t x219; uint8_t _ = _addcarryx_u32(0x0, x217, x208, &x219);
+{ uint32_t _; uint8_t x223 = _addcarryx_u32(0x0, x183, x198, &_);
+{ uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x186, x210, &x225);
+{ uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x189, x213, &x228);
+{ uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x192, x216, &x231);
+{ uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x195, x219, &x234);
+{ uint8_t x236 = (x235 + x196);
+{ uint32_t x239; uint32_t x238 = _mulx_u32(x8, x11, &x239);
+{ uint32_t x242; uint32_t x241 = _mulx_u32(x8, x13, &x242);
+{ uint32_t x245; uint32_t x244 = _mulx_u32(x8, x15, &x245);
+{ uint32_t x248; uint32_t x247 = _mulx_u32(x8, x14, &x248);
+{ uint32_t x250; uint8_t x251 = _addcarryx_u32(0x0, x239, x241, &x250);
+{ uint32_t x253; uint8_t x254 = _addcarryx_u32(x251, x242, x244, &x253);
+{ uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
+{ uint32_t x259; uint8_t _ = _addcarryx_u32(0x0, x257, x248, &x259);
+{ uint32_t x262; uint8_t x263 = _addcarryx_u32(0x0, x225, x238, &x262);
+{ uint32_t x265; uint8_t x266 = _addcarryx_u32(x263, x228, x250, &x265);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x231, x253, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x234, x256, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x236, x259, &x274);
+{ uint32_t x278; uint32_t x277 = _mulx_u32(x262, 0xffffffff, &x278);
+{ uint32_t x281; uint32_t x280 = _mulx_u32(x262, 0xffffffff, &x281);
+{ uint32_t x284; uint32_t x283 = _mulx_u32(x262, 0xffffffff, &x284);
+{ uint32_t x287; uint32_t x286 = _mulx_u32(x262, 0x7fffffff, &x287);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(0x0, x278, x280, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x281, x283, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x284, x286, &x295);
+{ uint32_t x298; uint8_t _ = _addcarryx_u32(0x0, x296, x287, &x298);
+{ uint32_t _; uint8_t x302 = _addcarryx_u32(0x0, x262, x277, &_);
+{ uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x265, x289, &x304);
+{ uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x268, x292, &x307);
+{ uint32_t x310; uint8_t x311 = _addcarryx_u32(x308, x271, x295, &x310);
+{ uint32_t x313; uint8_t x314 = _addcarryx_u32(x311, x274, x298, &x313);
+{ uint8_t x315 = (x314 + x275);
+{ uint32_t x317; uint8_t x318 = _subborrow_u32(0x0, x304, 0xffffffff, &x317);
+{ uint32_t x320; uint8_t x321 = _subborrow_u32(x318, x307, 0xffffffff, &x320);
+{ uint32_t x323; uint8_t x324 = _subborrow_u32(x321, x310, 0xffffffff, &x323);
+{ uint32_t x326; uint8_t x327 = _subborrow_u32(x324, x313, 0x7fffffff, &x326);
+{ uint32_t _; uint8_t x330 = _subborrow_u32(x327, x315, 0x0, &_);
+{ uint32_t x331 = cmovznz(x330, x326, x313);
+{ uint32_t x332 = cmovznz(x330, x323, x310);
+{ uint32_t x333 = cmovznz(x330, x320, x307);
+{ uint32_t x334 = cmovznz(x330, x317, x304);
+out[0] = x331;
+out[1] = x332;
+out[2] = x333;
+out[3] = x334;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery32_2e127m1/femul.h b/src/Specific/montgomery32_2e127m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery32_2e127m1/fenz.c b/src/Specific/montgomery32_2e127m1/fenz.c
new file mode 100644
index 000000000..308da273a
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x7 = (x6 | x5);
+{ uint32_t x8 = (x4 | x7);
+{ uint32_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e127m1/fenz.h b/src/Specific/montgomery32_2e127m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e127m1/feopp.c b/src/Specific/montgomery32_2e127m1/feopp.c
new file mode 100644
index 000000000..db5acedf9
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x8; uint8_t x9 = _subborrow_u32(0x0, 0x0, x2, &x8);
+{ uint32_t x11; uint8_t x12 = _subborrow_u32(x9, 0x0, x4, &x11);
+{ uint32_t x14; uint8_t x15 = _subborrow_u32(x12, 0x0, x6, &x14);
+{ uint32_t x17; uint8_t x18 = _subborrow_u32(x15, 0x0, x5, &x17);
+{ uint32_t x19 = (uint32_t)cmovznz(x18, 0x0, 0xffffffff);
+{ uint32_t x20 = (x19 & 0xffffffff);
+{ uint32_t x22; uint8_t x23 = _addcarryx_u32(0x0, x8, x20, &x22);
+{ uint32_t x24 = (x19 & 0xffffffff);
+{ uint32_t x26; uint8_t x27 = _addcarryx_u32(x23, x11, x24, &x26);
+{ uint32_t x28 = (x19 & 0xffffffff);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x27, x14, x28, &x30);
+{ uint32_t x32 = (x19 & 0x7fffffff);
+{ uint32_t x34; uint8_t _ = _addcarryx_u32(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery32_2e127m1/feopp.h b/src/Specific/montgomery32_2e127m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e127m1/fesub.c b/src/Specific/montgomery32_2e127m1/fesub.c
new file mode 100644
index 000000000..573f8a4f7
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint32_t x17; uint8_t x18 = _subborrow_u32(0x0, x5, x11, &x17);
+{ uint32_t x20; uint8_t x21 = _subborrow_u32(x18, x7, x13, &x20);
+{ uint32_t x23; uint8_t x24 = _subborrow_u32(x21, x9, x15, &x23);
+{ uint32_t x26; uint8_t x27 = _subborrow_u32(x24, x8, x14, &x26);
+{ uint32_t x28 = (uint32_t)cmovznz(x27, 0x0, 0xffffffff);
+{ uint32_t x29 = (x28 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(0x0, x17, x29, &x31);
+{ uint32_t x33 = (x28 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x20, x33, &x35);
+{ uint32_t x37 = (x28 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x23, x37, &x39);
+{ uint32_t x41 = (x28 & 0x7fffffff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery32_2e127m1/fesub.h b/src/Specific/montgomery32_2e127m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery32_2e127m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery32_2e129m25/feadd.c b/src/Specific/montgomery32_2e129m25/feadd.c
new file mode 100644
index 000000000..51faddc1d
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe7, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e129m25/feadd.h b/src/Specific/montgomery32_2e129m25/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e129m25/femul.c b/src/Specific/montgomery32_2e129m25/femul.c
new file mode 100644
index 000000000..b4e56de2d
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/femul.c
@@ -0,0 +1,190 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc28f5c29, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe7, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(0x0, x55, x57, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x58, x60, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x61, x63, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x64, x51, &x75);
+{ uint32_t _; uint8_t x79 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x36, x66, &x81);
+{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x39, x69, &x84);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x42, x72, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x45, x75, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x48, x76, &x93);
+{ uint32_t x97; uint32_t x96 = _mulx_u32(x7, x13, &x97);
+{ uint32_t x100; uint32_t x99 = _mulx_u32(x7, x15, &x100);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x17, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x19, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x18, &x109);
+{ uint32_t x111; uint8_t x112 = _addcarryx_u32(0x0, x97, x99, &x111);
+{ uint32_t x114; uint8_t x115 = _addcarryx_u32(x112, x100, x102, &x114);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(x115, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t _ = _addcarryx_u32(0x0, x121, x109, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(0x0, x81, x96, &x126);
+{ uint32_t x129; uint8_t x130 = _addcarryx_u32(x127, x84, x111, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(x130, x87, x114, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x94, x123, &x141);
+{ uint32_t _; uint32_t x144 = _mulx_u32(x126, 0xc28f5c29, &_);
+{ uint32_t x148; uint32_t x147 = _mulx_u32(x144, 0xffffffe7, &x148);
+{ uint32_t x151; uint32_t x150 = _mulx_u32(x144, 0xffffffff, &x151);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x144, 0xffffffff, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x144, 0xffffffff, &x157);
+{ uint32_t x159; uint8_t x160 = _addcarryx_u32(0x0, x148, x150, &x159);
+{ uint32_t x162; uint8_t x163 = _addcarryx_u32(x160, x151, x153, &x162);
+{ uint32_t x165; uint8_t x166 = _addcarryx_u32(x163, x154, x156, &x165);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(x166, x157, x144, &x168);
+{ uint32_t _; uint8_t x172 = _addcarryx_u32(0x0, x126, x147, &_);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x129, x159, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x132, x162, &x177);
+{ uint32_t x180; uint8_t x181 = _addcarryx_u32(x178, x135, x165, &x180);
+{ uint32_t x183; uint8_t x184 = _addcarryx_u32(x181, x138, x168, &x183);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x141, x169, &x186);
+{ uint8_t x188 = (x187 + x142);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x9, x13, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x9, x15, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x9, x17, &x197);
+{ uint32_t x200; uint32_t x199 = _mulx_u32(x9, x19, &x200);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x18, &x203);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(0x0, x191, x193, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x194, x196, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x197, x199, &x211);
+{ uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x200, x202, &x214);
+{ uint32_t x217; uint8_t _ = _addcarryx_u32(0x0, x215, x203, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(0x0, x174, x190, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x177, x205, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x180, x208, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x183, x211, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x186, x214, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x188, x217, &x235);
+{ uint32_t _; uint32_t x238 = _mulx_u32(x220, 0xc28f5c29, &_);
+{ uint32_t x242; uint32_t x241 = _mulx_u32(x238, 0xffffffe7, &x242);
+{ uint32_t x245; uint32_t x244 = _mulx_u32(x238, 0xffffffff, &x245);
+{ uint32_t x248; uint32_t x247 = _mulx_u32(x238, 0xffffffff, &x248);
+{ uint32_t x251; uint32_t x250 = _mulx_u32(x238, 0xffffffff, &x251);
+{ uint32_t x253; uint8_t x254 = _addcarryx_u32(0x0, x242, x244, &x253);
+{ uint32_t x256; uint8_t x257 = _addcarryx_u32(x254, x245, x247, &x256);
+{ uint32_t x259; uint8_t x260 = _addcarryx_u32(x257, x248, x250, &x259);
+{ uint32_t x262; uint8_t x263 = _addcarryx_u32(x260, x251, x238, &x262);
+{ uint32_t _; uint8_t x266 = _addcarryx_u32(0x0, x220, x241, &_);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x223, x253, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x226, x256, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x229, x259, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x232, x262, &x277);
+{ uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x235, x263, &x280);
+{ uint8_t x282 = (x281 + x236);
+{ uint32_t x285; uint32_t x284 = _mulx_u32(x11, x13, &x285);
+{ uint32_t x288; uint32_t x287 = _mulx_u32(x11, x15, &x288);
+{ uint32_t x291; uint32_t x290 = _mulx_u32(x11, x17, &x291);
+{ uint32_t x294; uint32_t x293 = _mulx_u32(x11, x19, &x294);
+{ uint32_t x297; uint32_t x296 = _mulx_u32(x11, x18, &x297);
+{ uint32_t x299; uint8_t x300 = _addcarryx_u32(0x0, x285, x287, &x299);
+{ uint32_t x302; uint8_t x303 = _addcarryx_u32(x300, x288, x290, &x302);
+{ uint32_t x305; uint8_t x306 = _addcarryx_u32(x303, x291, x293, &x305);
+{ uint32_t x308; uint8_t x309 = _addcarryx_u32(x306, x294, x296, &x308);
+{ uint32_t x311; uint8_t _ = _addcarryx_u32(0x0, x309, x297, &x311);
+{ uint32_t x314; uint8_t x315 = _addcarryx_u32(0x0, x268, x284, &x314);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x271, x299, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x274, x302, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x277, x305, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x280, x308, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x282, x311, &x329);
+{ uint32_t _; uint32_t x332 = _mulx_u32(x314, 0xc28f5c29, &_);
+{ uint32_t x336; uint32_t x335 = _mulx_u32(x332, 0xffffffe7, &x336);
+{ uint32_t x339; uint32_t x338 = _mulx_u32(x332, 0xffffffff, &x339);
+{ uint32_t x342; uint32_t x341 = _mulx_u32(x332, 0xffffffff, &x342);
+{ uint32_t x345; uint32_t x344 = _mulx_u32(x332, 0xffffffff, &x345);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(0x0, x336, x338, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x339, x341, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x342, x344, &x353);
+{ uint32_t x356; uint8_t x357 = _addcarryx_u32(x354, x345, x332, &x356);
+{ uint32_t _; uint8_t x360 = _addcarryx_u32(0x0, x314, x335, &_);
+{ uint32_t x362; uint8_t x363 = _addcarryx_u32(x360, x317, x347, &x362);
+{ uint32_t x365; uint8_t x366 = _addcarryx_u32(x363, x320, x350, &x365);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(x366, x323, x353, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x326, x356, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x329, x357, &x374);
+{ uint8_t x376 = (x375 + x330);
+{ uint32_t x379; uint32_t x378 = _mulx_u32(x10, x13, &x379);
+{ uint32_t x382; uint32_t x381 = _mulx_u32(x10, x15, &x382);
+{ uint32_t x385; uint32_t x384 = _mulx_u32(x10, x17, &x385);
+{ uint32_t x388; uint32_t x387 = _mulx_u32(x10, x19, &x388);
+{ uint32_t x391; uint32_t x390 = _mulx_u32(x10, x18, &x391);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x379, x381, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x382, x384, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x385, x387, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x388, x390, &x402);
+{ uint32_t x405; uint8_t _ = _addcarryx_u32(0x0, x403, x391, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(0x0, x362, x378, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x365, x393, &x411);
+{ uint32_t x414; uint8_t x415 = _addcarryx_u32(x412, x368, x396, &x414);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(x415, x371, x399, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x374, x402, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x376, x405, &x423);
+{ uint32_t _; uint32_t x426 = _mulx_u32(x408, 0xc28f5c29, &_);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x426, 0xffffffe7, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x426, 0xffffffff, &x433);
+{ uint32_t x436; uint32_t x435 = _mulx_u32(x426, 0xffffffff, &x436);
+{ uint32_t x439; uint32_t x438 = _mulx_u32(x426, 0xffffffff, &x439);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(0x0, x430, x432, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x433, x435, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x436, x438, &x447);
+{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x439, x426, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x408, x429, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x411, x441, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x414, x444, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x417, x447, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x420, x450, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x423, x451, &x468);
+{ uint8_t x470 = (x469 + x424);
+{ uint32_t x472; uint8_t x473 = _subborrow_u32(0x0, x456, 0xffffffe7, &x472);
+{ uint32_t x475; uint8_t x476 = _subborrow_u32(x473, x459, 0xffffffff, &x475);
+{ uint32_t x478; uint8_t x479 = _subborrow_u32(x476, x462, 0xffffffff, &x478);
+{ uint32_t x481; uint8_t x482 = _subborrow_u32(x479, x465, 0xffffffff, &x481);
+{ uint32_t x484; uint8_t x485 = _subborrow_u32(x482, x468, 0x1, &x484);
+{ uint32_t _; uint8_t x488 = _subborrow_u32(x485, x470, 0x0, &_);
+{ uint32_t x489 = cmovznz(x488, x484, x468);
+{ uint32_t x490 = cmovznz(x488, x481, x465);
+{ uint32_t x491 = cmovznz(x488, x478, x462);
+{ uint32_t x492 = cmovznz(x488, x475, x459);
+{ uint32_t x493 = cmovznz(x488, x472, x456);
+out[0] = x489;
+out[1] = x490;
+out[2] = x491;
+out[3] = x492;
+out[4] = x493;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e129m25/femul.h b/src/Specific/montgomery32_2e129m25/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e129m25/fenz.c b/src/Specific/montgomery32_2e129m25/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e129m25/fenz.h b/src/Specific/montgomery32_2e129m25/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e129m25/feopp.c b/src/Specific/montgomery32_2e129m25/feopp.c
new file mode 100644
index 000000000..4d8e13b5f
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xffffffe7);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint8_t x41 = ((uint8_t)x24 & 0x1);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e129m25/feopp.h b/src/Specific/montgomery32_2e129m25/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e129m25/fesub.c b/src/Specific/montgomery32_2e129m25/fesub.c
new file mode 100644
index 000000000..5bc68d56e
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xffffffe7);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint8_t x52 = ((uint8_t)x35 & 0x1);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e129m25/fesub.h b/src/Specific/montgomery32_2e129m25/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e129m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e130m5/feadd.c b/src/Specific/montgomery32_2e130m5/feadd.c
new file mode 100644
index 000000000..d9c9d32b0
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e130m5/feadd.h b/src/Specific/montgomery32_2e130m5/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e130m5/femul.c b/src/Specific/montgomery32_2e130m5/femul.c
new file mode 100644
index 000000000..786c579e2
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/femul.c
@@ -0,0 +1,39 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+out[0] = uint32_t x66;
+out[1] = uint8_t x67 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x51;
+out[2] = 0x3;;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery32_2e130m5/femul.h b/src/Specific/montgomery32_2e130m5/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e130m5/fenz.c b/src/Specific/montgomery32_2e130m5/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e130m5/fenz.h b/src/Specific/montgomery32_2e130m5/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e130m5/feopp.c b/src/Specific/montgomery32_2e130m5/feopp.c
new file mode 100644
index 000000000..3b8d9927a
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffffb);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint8_t x41 = ((uint8_t)x24 & 0x3);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e130m5/feopp.h b/src/Specific/montgomery32_2e130m5/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e130m5/fesub.c b/src/Specific/montgomery32_2e130m5/fesub.c
new file mode 100644
index 000000000..7abdaafc0
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffffb);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint8_t x52 = ((uint8_t)x35 & 0x3);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e130m5/fesub.h b/src/Specific/montgomery32_2e130m5/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e130m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e137m13/feadd.c b/src/Specific/montgomery32_2e137m13/feadd.c
new file mode 100644
index 000000000..ad954bac8
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff3, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1ff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e137m13/feadd.h b/src/Specific/montgomery32_2e137m13/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e137m13/femul.c b/src/Specific/montgomery32_2e137m13/femul.c
new file mode 100644
index 000000000..5cbadc344
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xc4ec4ec5, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff3, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1ff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xc4ec4ec5, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff3, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1ff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xc4ec4ec5, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff3, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1ff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xc4ec4ec5, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff3, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1ff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xc4ec4ec5, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff3, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1ff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff3, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1ff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e137m13/femul.h b/src/Specific/montgomery32_2e137m13/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e137m13/fenz.c b/src/Specific/montgomery32_2e137m13/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e137m13/fenz.h b/src/Specific/montgomery32_2e137m13/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e137m13/feopp.c b/src/Specific/montgomery32_2e137m13/feopp.c
new file mode 100644
index 000000000..9c760b607
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffff3);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0x1ff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e137m13/feopp.h b/src/Specific/montgomery32_2e137m13/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e137m13/fesub.c b/src/Specific/montgomery32_2e137m13/fesub.c
new file mode 100644
index 000000000..f59e7ee23
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffff3);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0x1ff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e137m13/fesub.h b/src/Specific/montgomery32_2e137m13/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e137m13/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e140m27/feadd.c b/src/Specific/montgomery32_2e140m27/feadd.c
new file mode 100644
index 000000000..cd37bdb82
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffe5, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xfff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e140m27/feadd.h b/src/Specific/montgomery32_2e140m27/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e140m27/femul.c b/src/Specific/montgomery32_2e140m27/femul.c
new file mode 100644
index 000000000..383adf779
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x684bda13, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffe5, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xfff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x684bda13, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffe5, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xfff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x684bda13, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffe5, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xfff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x684bda13, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffe5, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xfff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x684bda13, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffe5, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xfff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffe5, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xfff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e140m27/femul.h b/src/Specific/montgomery32_2e140m27/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e140m27/fenz.c b/src/Specific/montgomery32_2e140m27/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e140m27/fenz.h b/src/Specific/montgomery32_2e140m27/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e140m27/feopp.c b/src/Specific/montgomery32_2e140m27/feopp.c
new file mode 100644
index 000000000..0044def60
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xffffffe5);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0xfff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e140m27/feopp.h b/src/Specific/montgomery32_2e140m27/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e140m27/fesub.c b/src/Specific/montgomery32_2e140m27/fesub.c
new file mode 100644
index 000000000..e3d1e0f7b
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xffffffe5);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0xfff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e140m27/fesub.h b/src/Specific/montgomery32_2e140m27/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e140m27/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e141m9/feadd.c b/src/Specific/montgomery32_2e141m9/feadd.c
new file mode 100644
index 000000000..96afc57a3
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff7, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x1fff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e141m9/feadd.h b/src/Specific/montgomery32_2e141m9/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e141m9/femul.c b/src/Specific/montgomery32_2e141m9/femul.c
new file mode 100644
index 000000000..1b4a3b2c0
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0x38e38e39, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff7, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x1fff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0x38e38e39, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff7, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x1fff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0x38e38e39, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff7, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x1fff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0x38e38e39, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff7, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x1fff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0x38e38e39, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff7, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x1fff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff7, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x1fff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e141m9/femul.h b/src/Specific/montgomery32_2e141m9/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e141m9/fenz.c b/src/Specific/montgomery32_2e141m9/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e141m9/fenz.h b/src/Specific/montgomery32_2e141m9/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e141m9/feopp.c b/src/Specific/montgomery32_2e141m9/feopp.c
new file mode 100644
index 000000000..be4e2fff3
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffff7);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0x1fff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e141m9/feopp.h b/src/Specific/montgomery32_2e141m9/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e141m9/fesub.c b/src/Specific/montgomery32_2e141m9/fesub.c
new file mode 100644
index 000000000..09a0effa4
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffff7);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0x1fff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e141m9/fesub.h b/src/Specific/montgomery32_2e141m9/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e141m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m3/feadd.c b/src/Specific/montgomery32_2e150m3/feadd.c
new file mode 100644
index 000000000..6d48d8e63
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffd, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m3/feadd.h b/src/Specific/montgomery32_2e150m3/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m3/femul.c b/src/Specific/montgomery32_2e150m3/femul.c
new file mode 100644
index 000000000..c3749a6dd
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xaaaaaaab, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffd, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xaaaaaaab, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffd, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xaaaaaaab, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffd, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xaaaaaaab, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffd, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xaaaaaaab, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffd, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffd, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m3/femul.h b/src/Specific/montgomery32_2e150m3/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m3/fenz.c b/src/Specific/montgomery32_2e150m3/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e150m3/fenz.h b/src/Specific/montgomery32_2e150m3/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e150m3/feopp.c b/src/Specific/montgomery32_2e150m3/feopp.c
new file mode 100644
index 000000000..d4dec9d6d
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffffd);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0x3fffff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m3/feopp.h b/src/Specific/montgomery32_2e150m3/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e150m3/fesub.c b/src/Specific/montgomery32_2e150m3/fesub.c
new file mode 100644
index 000000000..7039f7ecd
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffffd);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0x3fffff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m3/fesub.h b/src/Specific/montgomery32_2e150m3/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m5/feadd.c b/src/Specific/montgomery32_2e150m5/feadd.c
new file mode 100644
index 000000000..f0c527d11
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffffb, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m5/feadd.h b/src/Specific/montgomery32_2e150m5/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m5/femul.c b/src/Specific/montgomery32_2e150m5/femul.c
new file mode 100644
index 000000000..b0134aeff
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xcccccccd, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffffb, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xcccccccd, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffffb, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xcccccccd, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffffb, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xcccccccd, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffffb, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xcccccccd, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffffb, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffffb, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m5/femul.h b/src/Specific/montgomery32_2e150m5/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e150m5/fenz.c b/src/Specific/montgomery32_2e150m5/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e150m5/fenz.h b/src/Specific/montgomery32_2e150m5/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e150m5/feopp.c b/src/Specific/montgomery32_2e150m5/feopp.c
new file mode 100644
index 000000000..e4f714108
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffffb);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0x3fffff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m5/feopp.h b/src/Specific/montgomery32_2e150m5/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e150m5/fesub.c b/src/Specific/montgomery32_2e150m5/fesub.c
new file mode 100644
index 000000000..4236fbd26
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffffb);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0x3fffff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e150m5/fesub.h b/src/Specific/montgomery32_2e150m5/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e150m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e152m17/feadd.c b/src/Specific/montgomery32_2e152m17/feadd.c
new file mode 100644
index 000000000..92a4e0d5f
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xffffffef, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0xffffff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e152m17/feadd.h b/src/Specific/montgomery32_2e152m17/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e152m17/femul.c b/src/Specific/montgomery32_2e152m17/femul.c
new file mode 100644
index 000000000..6b1678812
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xf0f0f0f1, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xffffffef, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0xffffff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xf0f0f0f1, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xffffffef, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0xffffff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xf0f0f0f1, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xffffffef, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0xffffff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xf0f0f0f1, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xffffffef, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0xffffff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xf0f0f0f1, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xffffffef, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0xffffff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xffffffef, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0xffffff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e152m17/femul.h b/src/Specific/montgomery32_2e152m17/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e152m17/fenz.c b/src/Specific/montgomery32_2e152m17/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e152m17/fenz.h b/src/Specific/montgomery32_2e152m17/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e152m17/feopp.c b/src/Specific/montgomery32_2e152m17/feopp.c
new file mode 100644
index 000000000..095aad6db
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xffffffef);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0xffffff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e152m17/feopp.h b/src/Specific/montgomery32_2e152m17/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e152m17/fesub.c b/src/Specific/montgomery32_2e152m17/fesub.c
new file mode 100644
index 000000000..bf1970d94
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xffffffef);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0xffffff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e152m17/fesub.h b/src/Specific/montgomery32_2e152m17/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e152m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e158m15/feadd.c b/src/Specific/montgomery32_2e158m15/feadd.c
new file mode 100644
index 000000000..c28d5c418
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _addcarryx_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _addcarryx_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _addcarryx_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(x31, x10, x18, &x33);
+{ uint32_t x36; uint8_t x37 = _subborrow_u32(0x0, x21, 0xfffffff1, &x36);
+{ uint32_t x39; uint8_t x40 = _subborrow_u32(x37, x24, 0xffffffff, &x39);
+{ uint32_t x42; uint8_t x43 = _subborrow_u32(x40, x27, 0xffffffff, &x42);
+{ uint32_t x45; uint8_t x46 = _subborrow_u32(x43, x30, 0xffffffff, &x45);
+{ uint32_t x48; uint8_t x49 = _subborrow_u32(x46, x33, 0x3fffffff, &x48);
+{ uint32_t _; uint8_t x52 = _subborrow_u32(x49, x34, 0x0, &_);
+{ uint32_t x53 = cmovznz(x52, x48, x33);
+{ uint32_t x54 = cmovznz(x52, x45, x30);
+{ uint32_t x55 = cmovznz(x52, x42, x27);
+{ uint32_t x56 = cmovznz(x52, x39, x24);
+{ uint32_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e158m15/feadd.h b/src/Specific/montgomery32_2e158m15/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e158m15/femul.c b/src/Specific/montgomery32_2e158m15/femul.c
new file mode 100644
index 000000000..fd32df5b8
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x22; uint32_t x21 = _mulx_u32(x5, x13, &x22);
+{ uint32_t x25; uint32_t x24 = _mulx_u32(x5, x15, &x25);
+{ uint32_t x28; uint32_t x27 = _mulx_u32(x5, x17, &x28);
+{ uint32_t x31; uint32_t x30 = _mulx_u32(x5, x19, &x31);
+{ uint32_t x34; uint32_t x33 = _mulx_u32(x5, x18, &x34);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(0x0, x22, x24, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x25, x27, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x28, x30, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x31, x33, &x45);
+{ uint32_t x48; uint8_t _ = _addcarryx_u32(0x0, x46, x34, &x48);
+{ uint32_t _; uint32_t x51 = _mulx_u32(x21, 0xeeeeeeef, &_);
+{ uint32_t x55; uint32_t x54 = _mulx_u32(x51, 0xfffffff1, &x55);
+{ uint32_t x58; uint32_t x57 = _mulx_u32(x51, 0xffffffff, &x58);
+{ uint32_t x61; uint32_t x60 = _mulx_u32(x51, 0xffffffff, &x61);
+{ uint32_t x64; uint32_t x63 = _mulx_u32(x51, 0xffffffff, &x64);
+{ uint32_t x67; uint32_t x66 = _mulx_u32(x51, 0x3fffffff, &x67);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(0x0, x55, x57, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x58, x60, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x61, x63, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x64, x66, &x78);
+{ uint32_t x81; uint8_t _ = _addcarryx_u32(0x0, x79, x67, &x81);
+{ uint32_t _; uint8_t x85 = _addcarryx_u32(0x0, x21, x54, &_);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x36, x69, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x39, x72, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x42, x75, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x45, x78, &x96);
+{ uint32_t x99; uint8_t x100 = _addcarryx_u32(x97, x48, x81, &x99);
+{ uint32_t x103; uint32_t x102 = _mulx_u32(x7, x13, &x103);
+{ uint32_t x106; uint32_t x105 = _mulx_u32(x7, x15, &x106);
+{ uint32_t x109; uint32_t x108 = _mulx_u32(x7, x17, &x109);
+{ uint32_t x112; uint32_t x111 = _mulx_u32(x7, x19, &x112);
+{ uint32_t x115; uint32_t x114 = _mulx_u32(x7, x18, &x115);
+{ uint32_t x117; uint8_t x118 = _addcarryx_u32(0x0, x103, x105, &x117);
+{ uint32_t x120; uint8_t x121 = _addcarryx_u32(x118, x106, x108, &x120);
+{ uint32_t x123; uint8_t x124 = _addcarryx_u32(x121, x109, x111, &x123);
+{ uint32_t x126; uint8_t x127 = _addcarryx_u32(x124, x112, x114, &x126);
+{ uint32_t x129; uint8_t _ = _addcarryx_u32(0x0, x127, x115, &x129);
+{ uint32_t x132; uint8_t x133 = _addcarryx_u32(0x0, x87, x102, &x132);
+{ uint32_t x135; uint8_t x136 = _addcarryx_u32(x133, x90, x117, &x135);
+{ uint32_t x138; uint8_t x139 = _addcarryx_u32(x136, x93, x120, &x138);
+{ uint32_t x141; uint8_t x142 = _addcarryx_u32(x139, x96, x123, &x141);
+{ uint32_t x144; uint8_t x145 = _addcarryx_u32(x142, x99, x126, &x144);
+{ uint32_t x147; uint8_t x148 = _addcarryx_u32(x145, x100, x129, &x147);
+{ uint32_t _; uint32_t x150 = _mulx_u32(x132, 0xeeeeeeef, &_);
+{ uint32_t x154; uint32_t x153 = _mulx_u32(x150, 0xfffffff1, &x154);
+{ uint32_t x157; uint32_t x156 = _mulx_u32(x150, 0xffffffff, &x157);
+{ uint32_t x160; uint32_t x159 = _mulx_u32(x150, 0xffffffff, &x160);
+{ uint32_t x163; uint32_t x162 = _mulx_u32(x150, 0xffffffff, &x163);
+{ uint32_t x166; uint32_t x165 = _mulx_u32(x150, 0x3fffffff, &x166);
+{ uint32_t x168; uint8_t x169 = _addcarryx_u32(0x0, x154, x156, &x168);
+{ uint32_t x171; uint8_t x172 = _addcarryx_u32(x169, x157, x159, &x171);
+{ uint32_t x174; uint8_t x175 = _addcarryx_u32(x172, x160, x162, &x174);
+{ uint32_t x177; uint8_t x178 = _addcarryx_u32(x175, x163, x165, &x177);
+{ uint32_t x180; uint8_t _ = _addcarryx_u32(0x0, x178, x166, &x180);
+{ uint32_t _; uint8_t x184 = _addcarryx_u32(0x0, x132, x153, &_);
+{ uint32_t x186; uint8_t x187 = _addcarryx_u32(x184, x135, x168, &x186);
+{ uint32_t x189; uint8_t x190 = _addcarryx_u32(x187, x138, x171, &x189);
+{ uint32_t x192; uint8_t x193 = _addcarryx_u32(x190, x141, x174, &x192);
+{ uint32_t x195; uint8_t x196 = _addcarryx_u32(x193, x144, x177, &x195);
+{ uint32_t x198; uint8_t x199 = _addcarryx_u32(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x9, x13, &x203);
+{ uint32_t x206; uint32_t x205 = _mulx_u32(x9, x15, &x206);
+{ uint32_t x209; uint32_t x208 = _mulx_u32(x9, x17, &x209);
+{ uint32_t x212; uint32_t x211 = _mulx_u32(x9, x19, &x212);
+{ uint32_t x215; uint32_t x214 = _mulx_u32(x9, x18, &x215);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(0x0, x203, x205, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x206, x208, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x209, x211, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x212, x214, &x226);
+{ uint32_t x229; uint8_t _ = _addcarryx_u32(0x0, x227, x215, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(0x0, x186, x202, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x189, x217, &x235);
+{ uint32_t x238; uint8_t x239 = _addcarryx_u32(x236, x192, x220, &x238);
+{ uint32_t x241; uint8_t x242 = _addcarryx_u32(x239, x195, x223, &x241);
+{ uint32_t x244; uint8_t x245 = _addcarryx_u32(x242, x198, x226, &x244);
+{ uint32_t x247; uint8_t x248 = _addcarryx_u32(x245, x200, x229, &x247);
+{ uint32_t _; uint32_t x250 = _mulx_u32(x232, 0xeeeeeeef, &_);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x250, 0xfffffff1, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x250, 0xffffffff, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x250, 0xffffffff, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x250, 0xffffffff, &x263);
+{ uint32_t x266; uint32_t x265 = _mulx_u32(x250, 0x3fffffff, &x266);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(0x0, x254, x256, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x257, x259, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x260, x262, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x263, x265, &x277);
+{ uint32_t x280; uint8_t _ = _addcarryx_u32(0x0, x278, x266, &x280);
+{ uint32_t _; uint8_t x284 = _addcarryx_u32(0x0, x232, x253, &_);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(x284, x235, x268, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x238, x271, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x241, x274, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x244, x277, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x11, x13, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x11, x15, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x11, x17, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x11, x19, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x11, x18, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x303, x305, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x306, x308, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x309, x311, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x312, x314, &x326);
+{ uint32_t x329; uint8_t _ = _addcarryx_u32(0x0, x327, x315, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(0x0, x286, x302, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x289, x317, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x292, x320, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x295, x323, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x298, x326, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x300, x329, &x347);
+{ uint32_t _; uint32_t x350 = _mulx_u32(x332, 0xeeeeeeef, &_);
+{ uint32_t x354; uint32_t x353 = _mulx_u32(x350, 0xfffffff1, &x354);
+{ uint32_t x357; uint32_t x356 = _mulx_u32(x350, 0xffffffff, &x357);
+{ uint32_t x360; uint32_t x359 = _mulx_u32(x350, 0xffffffff, &x360);
+{ uint32_t x363; uint32_t x362 = _mulx_u32(x350, 0xffffffff, &x363);
+{ uint32_t x366; uint32_t x365 = _mulx_u32(x350, 0x3fffffff, &x366);
+{ uint32_t x368; uint8_t x369 = _addcarryx_u32(0x0, x354, x356, &x368);
+{ uint32_t x371; uint8_t x372 = _addcarryx_u32(x369, x357, x359, &x371);
+{ uint32_t x374; uint8_t x375 = _addcarryx_u32(x372, x360, x362, &x374);
+{ uint32_t x377; uint8_t x378 = _addcarryx_u32(x375, x363, x365, &x377);
+{ uint32_t x380; uint8_t _ = _addcarryx_u32(0x0, x378, x366, &x380);
+{ uint32_t _; uint8_t x384 = _addcarryx_u32(0x0, x332, x353, &_);
+{ uint32_t x386; uint8_t x387 = _addcarryx_u32(x384, x335, x368, &x386);
+{ uint32_t x389; uint8_t x390 = _addcarryx_u32(x387, x338, x371, &x389);
+{ uint32_t x392; uint8_t x393 = _addcarryx_u32(x390, x341, x374, &x392);
+{ uint32_t x395; uint8_t x396 = _addcarryx_u32(x393, x344, x377, &x395);
+{ uint32_t x398; uint8_t x399 = _addcarryx_u32(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint32_t x403; uint32_t x402 = _mulx_u32(x10, x13, &x403);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x10, x15, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x10, x17, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x10, x19, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x10, x18, &x415);
+{ uint32_t x417; uint8_t x418 = _addcarryx_u32(0x0, x403, x405, &x417);
+{ uint32_t x420; uint8_t x421 = _addcarryx_u32(x418, x406, x408, &x420);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(x421, x409, x411, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x412, x414, &x426);
+{ uint32_t x429; uint8_t _ = _addcarryx_u32(0x0, x427, x415, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(0x0, x386, x402, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x389, x417, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x392, x420, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x395, x423, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x398, x426, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x400, x429, &x447);
+{ uint32_t _; uint32_t x450 = _mulx_u32(x432, 0xeeeeeeef, &_);
+{ uint32_t x454; uint32_t x453 = _mulx_u32(x450, 0xfffffff1, &x454);
+{ uint32_t x457; uint32_t x456 = _mulx_u32(x450, 0xffffffff, &x457);
+{ uint32_t x460; uint32_t x459 = _mulx_u32(x450, 0xffffffff, &x460);
+{ uint32_t x463; uint32_t x462 = _mulx_u32(x450, 0xffffffff, &x463);
+{ uint32_t x466; uint32_t x465 = _mulx_u32(x450, 0x3fffffff, &x466);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(0x0, x454, x456, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x457, x459, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x460, x462, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x463, x465, &x477);
+{ uint32_t x480; uint8_t _ = _addcarryx_u32(0x0, x478, x466, &x480);
+{ uint32_t _; uint8_t x484 = _addcarryx_u32(0x0, x432, x453, &_);
+{ uint32_t x486; uint8_t x487 = _addcarryx_u32(x484, x435, x468, &x486);
+{ uint32_t x489; uint8_t x490 = _addcarryx_u32(x487, x438, x471, &x489);
+{ uint32_t x492; uint8_t x493 = _addcarryx_u32(x490, x441, x474, &x492);
+{ uint32_t x495; uint8_t x496 = _addcarryx_u32(x493, x444, x477, &x495);
+{ uint32_t x498; uint8_t x499 = _addcarryx_u32(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint32_t x502; uint8_t x503 = _subborrow_u32(0x0, x486, 0xfffffff1, &x502);
+{ uint32_t x505; uint8_t x506 = _subborrow_u32(x503, x489, 0xffffffff, &x505);
+{ uint32_t x508; uint8_t x509 = _subborrow_u32(x506, x492, 0xffffffff, &x508);
+{ uint32_t x511; uint8_t x512 = _subborrow_u32(x509, x495, 0xffffffff, &x511);
+{ uint32_t x514; uint8_t x515 = _subborrow_u32(x512, x498, 0x3fffffff, &x514);
+{ uint32_t _; uint8_t x518 = _subborrow_u32(x515, x500, 0x0, &_);
+{ uint32_t x519 = cmovznz(x518, x514, x498);
+{ uint32_t x520 = cmovznz(x518, x511, x495);
+{ uint32_t x521 = cmovznz(x518, x508, x492);
+{ uint32_t x522 = cmovznz(x518, x505, x489);
+{ uint32_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e158m15/femul.h b/src/Specific/montgomery32_2e158m15/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e158m15/fenz.c b/src/Specific/montgomery32_2e158m15/fenz.c
new file mode 100644
index 000000000..755695e18
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x9 = (x8 | x7);
+{ uint32_t x10 = (x6 | x9);
+{ uint32_t x11 = (x4 | x10);
+{ uint32_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e158m15/fenz.h b/src/Specific/montgomery32_2e158m15/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e158m15/feopp.c b/src/Specific/montgomery32_2e158m15/feopp.c
new file mode 100644
index 000000000..ff0b2a1b3
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x10; uint8_t x11 = _subborrow_u32(0x0, 0x0, x2, &x10);
+{ uint32_t x13; uint8_t x14 = _subborrow_u32(x11, 0x0, x4, &x13);
+{ uint32_t x16; uint8_t x17 = _subborrow_u32(x14, 0x0, x6, &x16);
+{ uint32_t x19; uint8_t x20 = _subborrow_u32(x17, 0x0, x8, &x19);
+{ uint32_t x22; uint8_t x23 = _subborrow_u32(x20, 0x0, x7, &x22);
+{ uint32_t x24 = (uint32_t)cmovznz(x23, 0x0, 0xffffffff);
+{ uint32_t x25 = (x24 & 0xfffffff1);
+{ uint32_t x27; uint8_t x28 = _addcarryx_u32(0x0, x10, x25, &x27);
+{ uint32_t x29 = (x24 & 0xffffffff);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x28, x13, x29, &x31);
+{ uint32_t x33 = (x24 & 0xffffffff);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x32, x16, x33, &x35);
+{ uint32_t x37 = (x24 & 0xffffffff);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x36, x19, x37, &x39);
+{ uint32_t x41 = (x24 & 0x3fffffff);
+{ uint32_t x43; uint8_t _ = _addcarryx_u32(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e158m15/feopp.h b/src/Specific/montgomery32_2e158m15/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e158m15/fesub.c b/src/Specific/montgomery32_2e158m15/fesub.c
new file mode 100644
index 000000000..4234a2422
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(0x0, x5, x13, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, x7, x15, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, x9, x17, &x27);
+{ uint32_t x30; uint8_t x31 = _subborrow_u32(x28, x11, x19, &x30);
+{ uint32_t x33; uint8_t x34 = _subborrow_u32(x31, x10, x18, &x33);
+{ uint32_t x35 = (uint32_t)cmovznz(x34, 0x0, 0xffffffff);
+{ uint32_t x36 = (x35 & 0xfffffff1);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(0x0, x21, x36, &x38);
+{ uint32_t x40 = (x35 & 0xffffffff);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x39, x24, x40, &x42);
+{ uint32_t x44 = (x35 & 0xffffffff);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x43, x27, x44, &x46);
+{ uint32_t x48 = (x35 & 0xffffffff);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x47, x30, x48, &x50);
+{ uint32_t x52 = (x35 & 0x3fffffff);
+{ uint32_t x54; uint8_t _ = _addcarryx_u32(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery32_2e158m15/fesub.h b/src/Specific/montgomery32_2e158m15/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery32_2e158m15/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery32_2e165m25/feadd.c b/src/Specific/montgomery32_2e165m25/feadd.c
new file mode 100644
index 000000000..395c57b9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1f, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e165m25/feadd.h b/src/Specific/montgomery32_2e165m25/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e165m25/femul.c b/src/Specific/montgomery32_2e165m25/femul.c
new file mode 100644
index 000000000..5d56cb96e
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/femul.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+out[0] = uint32_t x79;
+out[1] = uint8_t x80 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x61;
+out[2] = 0x1f;;
+}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery32_2e165m25/femul.h b/src/Specific/montgomery32_2e165m25/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e165m25/fenz.c b/src/Specific/montgomery32_2e165m25/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e165m25/fenz.h b/src/Specific/montgomery32_2e165m25/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e165m25/feopp.c b/src/Specific/montgomery32_2e165m25/feopp.c
new file mode 100644
index 000000000..22ff19e12
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffe7);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint8_t x50 = ((uint8_t)x29 & 0x1f);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e165m25/feopp.h b/src/Specific/montgomery32_2e165m25/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e165m25/fesub.c b/src/Specific/montgomery32_2e165m25/fesub.c
new file mode 100644
index 000000000..2c2f9dadc
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffe7);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint8_t x63 = ((uint8_t)x42 & 0x1f);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e165m25/fesub.h b/src/Specific/montgomery32_2e165m25/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e165m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e166m5/feadd.c b/src/Specific/montgomery32_2e166m5/feadd.c
new file mode 100644
index 000000000..4d85b9fc4
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffb, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3f, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e166m5/feadd.h b/src/Specific/montgomery32_2e166m5/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e166m5/femul.c b/src/Specific/montgomery32_2e166m5/femul.c
new file mode 100644
index 000000000..794cb8b04
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/femul.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xcccccccd, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffb, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+out[0] = uint32_t x79;
+out[1] = uint8_t x80 = Op Syntax.MulSplit 32 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 x61;
+out[2] = 0x3f;;
+}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery32_2e166m5/femul.h b/src/Specific/montgomery32_2e166m5/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e166m5/fenz.c b/src/Specific/montgomery32_2e166m5/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e166m5/fenz.h b/src/Specific/montgomery32_2e166m5/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e166m5/feopp.c b/src/Specific/montgomery32_2e166m5/feopp.c
new file mode 100644
index 000000000..38900e439
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xfffffffb);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint8_t x50 = ((uint8_t)x29 & 0x3f);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e166m5/feopp.h b/src/Specific/montgomery32_2e166m5/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e166m5/fesub.c b/src/Specific/montgomery32_2e166m5/fesub.c
new file mode 100644
index 000000000..9b4fcba2d
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xfffffffb);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint8_t x63 = ((uint8_t)x42 & 0x3f);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e166m5/fesub.h b/src/Specific/montgomery32_2e166m5/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e166m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e171m19/feadd.c b/src/Specific/montgomery32_2e171m19/feadd.c
new file mode 100644
index 000000000..6d641caaf
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7ff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e171m19/feadd.h b/src/Specific/montgomery32_2e171m19/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e171m19/femul.c b/src/Specific/montgomery32_2e171m19/femul.c
new file mode 100644
index 000000000..c8fe55921
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7ff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7ff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7ff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7ff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7ff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7ff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7ff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e171m19/femul.h b/src/Specific/montgomery32_2e171m19/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e171m19/fenz.c b/src/Specific/montgomery32_2e171m19/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e171m19/fenz.h b/src/Specific/montgomery32_2e171m19/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e171m19/feopp.c b/src/Specific/montgomery32_2e171m19/feopp.c
new file mode 100644
index 000000000..79748f21c
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffed);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x7ff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e171m19/feopp.h b/src/Specific/montgomery32_2e171m19/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e171m19/fesub.c b/src/Specific/montgomery32_2e171m19/fesub.c
new file mode 100644
index 000000000..c47be818c
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffed);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x7ff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e171m19/fesub.h b/src/Specific/montgomery32_2e171m19/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e171m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m17/feadd.c b/src/Specific/montgomery32_2e174m17/feadd.c
new file mode 100644
index 000000000..0190a5e63
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffef, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m17/feadd.h b/src/Specific/montgomery32_2e174m17/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m17/femul.c b/src/Specific/montgomery32_2e174m17/femul.c
new file mode 100644
index 000000000..7b1f772c8
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xf0f0f0f1, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffef, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xf0f0f0f1, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffef, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xf0f0f0f1, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffef, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xf0f0f0f1, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffef, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xf0f0f0f1, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffef, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xf0f0f0f1, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffef, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffef, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m17/femul.h b/src/Specific/montgomery32_2e174m17/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m17/fenz.c b/src/Specific/montgomery32_2e174m17/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e174m17/fenz.h b/src/Specific/montgomery32_2e174m17/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e174m17/feopp.c b/src/Specific/montgomery32_2e174m17/feopp.c
new file mode 100644
index 000000000..a5c384418
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffef);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x3fff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m17/feopp.h b/src/Specific/montgomery32_2e174m17/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e174m17/fesub.c b/src/Specific/montgomery32_2e174m17/fesub.c
new file mode 100644
index 000000000..1b2b1408e
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffef);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x3fff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m17/fesub.h b/src/Specific/montgomery32_2e174m17/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m3/feadd.c b/src/Specific/montgomery32_2e174m3/feadd.c
new file mode 100644
index 000000000..ba1890ee6
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffffd, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m3/feadd.h b/src/Specific/montgomery32_2e174m3/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m3/femul.c b/src/Specific/montgomery32_2e174m3/femul.c
new file mode 100644
index 000000000..ca3165c2d
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xaaaaaaab, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffffd, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xaaaaaaab, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffffd, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xaaaaaaab, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffffd, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xaaaaaaab, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffffd, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xaaaaaaab, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffffd, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xaaaaaaab, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffffd, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffffd, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m3/femul.h b/src/Specific/montgomery32_2e174m3/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e174m3/fenz.c b/src/Specific/montgomery32_2e174m3/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e174m3/fenz.h b/src/Specific/montgomery32_2e174m3/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e174m3/feopp.c b/src/Specific/montgomery32_2e174m3/feopp.c
new file mode 100644
index 000000000..fd49d0c28
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xfffffffd);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x3fff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m3/feopp.h b/src/Specific/montgomery32_2e174m3/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e174m3/fesub.c b/src/Specific/montgomery32_2e174m3/fesub.c
new file mode 100644
index 000000000..111a2f165
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xfffffffd);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x3fff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e174m3/fesub.h b/src/Specific/montgomery32_2e174m3/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e174m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e189m25/feadd.c b/src/Specific/montgomery32_2e189m25/feadd.c
new file mode 100644
index 000000000..c33b5cae7
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffe7, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x1fffffff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e189m25/feadd.h b/src/Specific/montgomery32_2e189m25/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e189m25/femul.c b/src/Specific/montgomery32_2e189m25/femul.c
new file mode 100644
index 000000000..ce597761b
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xc28f5c29, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffe7, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x1fffffff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xc28f5c29, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffe7, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x1fffffff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xc28f5c29, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffe7, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x1fffffff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xc28f5c29, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffe7, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x1fffffff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xc28f5c29, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffe7, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x1fffffff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xc28f5c29, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffe7, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x1fffffff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffe7, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x1fffffff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e189m25/femul.h b/src/Specific/montgomery32_2e189m25/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e189m25/fenz.c b/src/Specific/montgomery32_2e189m25/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e189m25/fenz.h b/src/Specific/montgomery32_2e189m25/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e189m25/feopp.c b/src/Specific/montgomery32_2e189m25/feopp.c
new file mode 100644
index 000000000..e9276231c
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffe7);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x1fffffff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e189m25/feopp.h b/src/Specific/montgomery32_2e189m25/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e189m25/fesub.c b/src/Specific/montgomery32_2e189m25/fesub.c
new file mode 100644
index 000000000..80ae99e51
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffe7);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x1fffffff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e189m25/fesub.h b/src/Specific/montgomery32_2e189m25/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e189m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e190m11/feadd.c b/src/Specific/montgomery32_2e190m11/feadd.c
new file mode 100644
index 000000000..5b9de603a
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xfffffff5, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x3fffffff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e190m11/feadd.h b/src/Specific/montgomery32_2e190m11/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e190m11/femul.c b/src/Specific/montgomery32_2e190m11/femul.c
new file mode 100644
index 000000000..5305f497b
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0xba2e8ba3, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xfffffff5, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x3fffffff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0xba2e8ba3, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xfffffff5, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x3fffffff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0xba2e8ba3, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xfffffff5, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x3fffffff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0xba2e8ba3, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xfffffff5, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x3fffffff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0xba2e8ba3, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xfffffff5, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x3fffffff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0xba2e8ba3, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xfffffff5, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x3fffffff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xfffffff5, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x3fffffff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e190m11/femul.h b/src/Specific/montgomery32_2e190m11/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e190m11/fenz.c b/src/Specific/montgomery32_2e190m11/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e190m11/fenz.h b/src/Specific/montgomery32_2e190m11/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e190m11/feopp.c b/src/Specific/montgomery32_2e190m11/feopp.c
new file mode 100644
index 000000000..c888de137
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xfffffff5);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x3fffffff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e190m11/feopp.h b/src/Specific/montgomery32_2e190m11/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e190m11/fesub.c b/src/Specific/montgomery32_2e190m11/fesub.c
new file mode 100644
index 000000000..c51ae4448
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xfffffff5);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x3fffffff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e190m11/fesub.h b/src/Specific/montgomery32_2e190m11/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e190m11/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e191m19/feadd.c b/src/Specific/montgomery32_2e191m19/feadd.c
new file mode 100644
index 000000000..53b3f7549
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffed, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xffffffff, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0x7fffffff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e191m19/feadd.h b/src/Specific/montgomery32_2e191m19/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e191m19/femul.c b/src/Specific/montgomery32_2e191m19/femul.c
new file mode 100644
index 000000000..fdb2f44df
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t _; uint32_t x61 = _mulx_u32(x25, 0x286bca1b, &_);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x61, 0xffffffed, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x61, 0xffffffff, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x61, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x61, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x61, 0xffffffff, &x77);
+{ uint32_t x80; uint32_t x79 = _mulx_u32(x61, 0x7fffffff, &x80);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(0x0, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x77, x79, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x80, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x25, x64, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x43, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x46, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x49, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x52, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x55, x94, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x58, x97, &x118);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x15, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x22, &x137);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(0x0, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x134, x136, &x151);
+{ uint32_t x154; uint8_t _ = _addcarryx_u32(0x0, x152, x137, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(0x0, x103, x121, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x118, x151, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x119, x154, &x175);
+{ uint32_t _; uint32_t x178 = _mulx_u32(x157, 0x286bca1b, &_);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x178, 0xffffffed, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x178, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x178, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x178, 0xffffffff, &x191);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x178, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x178, 0x7fffffff, &x197);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(0x0, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t x209 = _addcarryx_u32(x206, x191, x193, &x208);
+{ uint32_t x211; uint8_t x212 = _addcarryx_u32(x209, x194, x196, &x211);
+{ uint32_t x214; uint8_t _ = _addcarryx_u32(0x0, x212, x197, &x214);
+{ uint32_t _; uint8_t x218 = _addcarryx_u32(0x0, x157, x181, &_);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x160, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x163, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x166, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x169, x208, &x229);
+{ uint32_t x232; uint8_t x233 = _addcarryx_u32(x230, x172, x211, &x232);
+{ uint32_t x235; uint8_t x236 = _addcarryx_u32(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x15, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x17, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x19, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x21, &x249);
+{ uint32_t x252; uint32_t x251 = _mulx_u32(x9, x23, &x252);
+{ uint32_t x255; uint32_t x254 = _mulx_u32(x9, x22, &x255);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(0x0, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t x267 = _addcarryx_u32(x264, x249, x251, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(x267, x252, x254, &x269);
+{ uint32_t x272; uint8_t _ = _addcarryx_u32(0x0, x270, x255, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(0x0, x220, x239, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x232, x266, &x287);
+{ uint32_t x290; uint8_t x291 = _addcarryx_u32(x288, x235, x269, &x290);
+{ uint32_t x293; uint8_t x294 = _addcarryx_u32(x291, x237, x272, &x293);
+{ uint32_t _; uint32_t x296 = _mulx_u32(x275, 0x286bca1b, &_);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x296, 0xffffffed, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x296, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x296, 0xffffffff, &x306);
+{ uint32_t x309; uint32_t x308 = _mulx_u32(x296, 0xffffffff, &x309);
+{ uint32_t x312; uint32_t x311 = _mulx_u32(x296, 0xffffffff, &x312);
+{ uint32_t x315; uint32_t x314 = _mulx_u32(x296, 0x7fffffff, &x315);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(0x0, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t x324 = _addcarryx_u32(x321, x306, x308, &x323);
+{ uint32_t x326; uint8_t x327 = _addcarryx_u32(x324, x309, x311, &x326);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x312, x314, &x329);
+{ uint32_t x332; uint8_t _ = _addcarryx_u32(0x0, x330, x315, &x332);
+{ uint32_t _; uint8_t x336 = _addcarryx_u32(0x0, x275, x299, &_);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x278, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x281, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x284, x323, &x344);
+{ uint32_t x347; uint8_t x348 = _addcarryx_u32(x345, x287, x326, &x347);
+{ uint32_t x350; uint8_t x351 = _addcarryx_u32(x348, x290, x329, &x350);
+{ uint32_t x353; uint8_t x354 = _addcarryx_u32(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x15, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x17, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x19, &x364);
+{ uint32_t x367; uint32_t x366 = _mulx_u32(x11, x21, &x367);
+{ uint32_t x370; uint32_t x369 = _mulx_u32(x11, x23, &x370);
+{ uint32_t x373; uint32_t x372 = _mulx_u32(x11, x22, &x373);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(0x0, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t x382 = _addcarryx_u32(x379, x364, x366, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(x382, x367, x369, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x370, x372, &x387);
+{ uint32_t x390; uint8_t _ = _addcarryx_u32(0x0, x388, x373, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(0x0, x338, x357, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x347, x381, &x402);
+{ uint32_t x405; uint8_t x406 = _addcarryx_u32(x403, x350, x384, &x405);
+{ uint32_t x408; uint8_t x409 = _addcarryx_u32(x406, x353, x387, &x408);
+{ uint32_t x411; uint8_t x412 = _addcarryx_u32(x409, x355, x390, &x411);
+{ uint32_t _; uint32_t x414 = _mulx_u32(x393, 0x286bca1b, &_);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x414, 0xffffffed, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x414, 0xffffffff, &x421);
+{ uint32_t x424; uint32_t x423 = _mulx_u32(x414, 0xffffffff, &x424);
+{ uint32_t x427; uint32_t x426 = _mulx_u32(x414, 0xffffffff, &x427);
+{ uint32_t x430; uint32_t x429 = _mulx_u32(x414, 0xffffffff, &x430);
+{ uint32_t x433; uint32_t x432 = _mulx_u32(x414, 0x7fffffff, &x433);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(0x0, x418, x420, &x435);
+{ uint32_t x438; uint8_t x439 = _addcarryx_u32(x436, x421, x423, &x438);
+{ uint32_t x441; uint8_t x442 = _addcarryx_u32(x439, x424, x426, &x441);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x427, x429, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x430, x432, &x447);
+{ uint32_t x450; uint8_t _ = _addcarryx_u32(0x0, x448, x433, &x450);
+{ uint32_t _; uint8_t x454 = _addcarryx_u32(0x0, x393, x417, &_);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x396, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x399, x438, &x459);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x402, x441, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x405, x444, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x408, x447, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x15, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x17, &x479);
+{ uint32_t x482; uint32_t x481 = _mulx_u32(x13, x19, &x482);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x21, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x23, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x22, &x491);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(0x0, x476, x478, &x493);
+{ uint32_t x496; uint8_t x497 = _addcarryx_u32(x494, x479, x481, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(x497, x482, x484, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x485, x487, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x488, x490, &x505);
+{ uint32_t x508; uint8_t _ = _addcarryx_u32(0x0, x506, x491, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(0x0, x456, x475, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x462, x496, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x465, x499, &x520);
+{ uint32_t x523; uint8_t x524 = _addcarryx_u32(x521, x468, x502, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(x524, x471, x505, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x473, x508, &x529);
+{ uint32_t _; uint32_t x532 = _mulx_u32(x511, 0x286bca1b, &_);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x532, 0xffffffed, &x536);
+{ uint32_t x539; uint32_t x538 = _mulx_u32(x532, 0xffffffff, &x539);
+{ uint32_t x542; uint32_t x541 = _mulx_u32(x532, 0xffffffff, &x542);
+{ uint32_t x545; uint32_t x544 = _mulx_u32(x532, 0xffffffff, &x545);
+{ uint32_t x548; uint32_t x547 = _mulx_u32(x532, 0xffffffff, &x548);
+{ uint32_t x551; uint32_t x550 = _mulx_u32(x532, 0x7fffffff, &x551);
+{ uint32_t x553; uint8_t x554 = _addcarryx_u32(0x0, x536, x538, &x553);
+{ uint32_t x556; uint8_t x557 = _addcarryx_u32(x554, x539, x541, &x556);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x542, x544, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x545, x547, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x548, x550, &x565);
+{ uint32_t x568; uint8_t _ = _addcarryx_u32(0x0, x566, x551, &x568);
+{ uint32_t _; uint8_t x572 = _addcarryx_u32(0x0, x511, x535, &_);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x514, x553, &x574);
+{ uint32_t x577; uint8_t x578 = _addcarryx_u32(x575, x517, x556, &x577);
+{ uint32_t x580; uint8_t x581 = _addcarryx_u32(x578, x520, x559, &x580);
+{ uint32_t x583; uint8_t x584 = _addcarryx_u32(x581, x523, x562, &x583);
+{ uint32_t x586; uint8_t x587 = _addcarryx_u32(x584, x526, x565, &x586);
+{ uint32_t x589; uint8_t x590 = _addcarryx_u32(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x15, &x594);
+{ uint32_t x597; uint32_t x596 = _mulx_u32(x12, x17, &x597);
+{ uint32_t x600; uint32_t x599 = _mulx_u32(x12, x19, &x600);
+{ uint32_t x603; uint32_t x602 = _mulx_u32(x12, x21, &x603);
+{ uint32_t x606; uint32_t x605 = _mulx_u32(x12, x23, &x606);
+{ uint32_t x609; uint32_t x608 = _mulx_u32(x12, x22, &x609);
+{ uint32_t x611; uint8_t x612 = _addcarryx_u32(0x0, x594, x596, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(x612, x597, x599, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x600, x602, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x603, x605, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x606, x608, &x623);
+{ uint32_t x626; uint8_t _ = _addcarryx_u32(0x0, x624, x609, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(0x0, x574, x593, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x577, x611, &x632);
+{ uint32_t x635; uint8_t x636 = _addcarryx_u32(x633, x580, x614, &x635);
+{ uint32_t x638; uint8_t x639 = _addcarryx_u32(x636, x583, x617, &x638);
+{ uint32_t x641; uint8_t x642 = _addcarryx_u32(x639, x586, x620, &x641);
+{ uint32_t x644; uint8_t x645 = _addcarryx_u32(x642, x589, x623, &x644);
+{ uint32_t x647; uint8_t x648 = _addcarryx_u32(x645, x591, x626, &x647);
+{ uint32_t _; uint32_t x650 = _mulx_u32(x629, 0x286bca1b, &_);
+{ uint32_t x654; uint32_t x653 = _mulx_u32(x650, 0xffffffed, &x654);
+{ uint32_t x657; uint32_t x656 = _mulx_u32(x650, 0xffffffff, &x657);
+{ uint32_t x660; uint32_t x659 = _mulx_u32(x650, 0xffffffff, &x660);
+{ uint32_t x663; uint32_t x662 = _mulx_u32(x650, 0xffffffff, &x663);
+{ uint32_t x666; uint32_t x665 = _mulx_u32(x650, 0xffffffff, &x666);
+{ uint32_t x669; uint32_t x668 = _mulx_u32(x650, 0x7fffffff, &x669);
+{ uint32_t x671; uint8_t x672 = _addcarryx_u32(0x0, x654, x656, &x671);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x657, x659, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x660, x662, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x663, x665, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x666, x668, &x683);
+{ uint32_t x686; uint8_t _ = _addcarryx_u32(0x0, x684, x669, &x686);
+{ uint32_t _; uint8_t x690 = _addcarryx_u32(0x0, x629, x653, &_);
+{ uint32_t x692; uint8_t x693 = _addcarryx_u32(x690, x632, x671, &x692);
+{ uint32_t x695; uint8_t x696 = _addcarryx_u32(x693, x635, x674, &x695);
+{ uint32_t x698; uint8_t x699 = _addcarryx_u32(x696, x638, x677, &x698);
+{ uint32_t x701; uint8_t x702 = _addcarryx_u32(x699, x641, x680, &x701);
+{ uint32_t x704; uint8_t x705 = _addcarryx_u32(x702, x644, x683, &x704);
+{ uint32_t x707; uint8_t x708 = _addcarryx_u32(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint32_t x711; uint8_t x712 = _subborrow_u32(0x0, x692, 0xffffffed, &x711);
+{ uint32_t x714; uint8_t x715 = _subborrow_u32(x712, x695, 0xffffffff, &x714);
+{ uint32_t x717; uint8_t x718 = _subborrow_u32(x715, x698, 0xffffffff, &x717);
+{ uint32_t x720; uint8_t x721 = _subborrow_u32(x718, x701, 0xffffffff, &x720);
+{ uint32_t x723; uint8_t x724 = _subborrow_u32(x721, x704, 0xffffffff, &x723);
+{ uint32_t x726; uint8_t x727 = _subborrow_u32(x724, x707, 0x7fffffff, &x726);
+{ uint32_t _; uint8_t x730 = _subborrow_u32(x727, x709, 0x0, &_);
+{ uint32_t x731 = cmovznz(x730, x726, x707);
+{ uint32_t x732 = cmovznz(x730, x723, x704);
+{ uint32_t x733 = cmovznz(x730, x720, x701);
+{ uint32_t x734 = cmovznz(x730, x717, x698);
+{ uint32_t x735 = cmovznz(x730, x714, x695);
+{ uint32_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e191m19/femul.h b/src/Specific/montgomery32_2e191m19/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e191m19/fenz.c b/src/Specific/montgomery32_2e191m19/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e191m19/fenz.h b/src/Specific/montgomery32_2e191m19/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e191m19/feopp.c b/src/Specific/montgomery32_2e191m19/feopp.c
new file mode 100644
index 000000000..e27f085de
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffed);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xffffffff);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0x7fffffff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e191m19/feopp.h b/src/Specific/montgomery32_2e191m19/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e191m19/fesub.c b/src/Specific/montgomery32_2e191m19/fesub.c
new file mode 100644
index 000000000..ff4740345
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffed);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xffffffff);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0x7fffffff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e191m19/fesub.h b/src/Specific/montgomery32_2e191m19/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e191m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feadd.c b/src/Specific/montgomery32_2e192m2e64m1/feadd.c
new file mode 100644
index 000000000..555a4f75f
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _addcarryx_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _addcarryx_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _addcarryx_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _addcarryx_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x12, x22, &x40);
+{ uint32_t x43; uint8_t x44 = _subborrow_u32(0x0, x25, 0xffffffff, &x43);
+{ uint32_t x46; uint8_t x47 = _subborrow_u32(x44, x28, 0xffffffff, &x46);
+{ uint32_t x49; uint8_t x50 = _subborrow_u32(x47, x31, 0xfffffffe, &x49);
+{ uint32_t x52; uint8_t x53 = _subborrow_u32(x50, x34, 0xffffffff, &x52);
+{ uint32_t x55; uint8_t x56 = _subborrow_u32(x53, x37, 0xffffffff, &x55);
+{ uint32_t x58; uint8_t x59 = _subborrow_u32(x56, x40, 0xffffffff, &x58);
+{ uint32_t _; uint8_t x62 = _subborrow_u32(x59, x41, 0x0, &_);
+{ uint32_t x63 = cmovznz(x62, x58, x40);
+{ uint32_t x64 = cmovznz(x62, x55, x37);
+{ uint32_t x65 = cmovznz(x62, x52, x34);
+{ uint32_t x66 = cmovznz(x62, x49, x31);
+{ uint32_t x67 = cmovznz(x62, x46, x28);
+{ uint32_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feadd.h b/src/Specific/montgomery32_2e192m2e64m1/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e192m2e64m1/femul.c b/src/Specific/montgomery32_2e192m2e64m1/femul.c
new file mode 100644
index 000000000..11d0723f4
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/femul.c
@@ -0,0 +1,266 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x26; uint32_t x25 = _mulx_u32(x5, x15, &x26);
+{ uint32_t x29; uint32_t x28 = _mulx_u32(x5, x17, &x29);
+{ uint32_t x32; uint32_t x31 = _mulx_u32(x5, x19, &x32);
+{ uint32_t x35; uint32_t x34 = _mulx_u32(x5, x21, &x35);
+{ uint32_t x38; uint32_t x37 = _mulx_u32(x5, x23, &x38);
+{ uint32_t x41; uint32_t x40 = _mulx_u32(x5, x22, &x41);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(0x0, x26, x28, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x29, x31, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x32, x34, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x35, x37, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x38, x40, &x55);
+{ uint32_t x58; uint8_t _ = _addcarryx_u32(0x0, x56, x41, &x58);
+{ uint32_t x62; uint32_t x61 = _mulx_u32(x25, 0xffffffff, &x62);
+{ uint32_t x65; uint32_t x64 = _mulx_u32(x25, 0xffffffff, &x65);
+{ uint32_t x68; uint32_t x67 = _mulx_u32(x25, 0xfffffffe, &x68);
+{ uint32_t x71; uint32_t x70 = _mulx_u32(x25, 0xffffffff, &x71);
+{ uint32_t x74; uint32_t x73 = _mulx_u32(x25, 0xffffffff, &x74);
+{ uint32_t x77; uint32_t x76 = _mulx_u32(x25, 0xffffffff, &x77);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(0x0, x62, x64, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x65, x67, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x68, x70, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x71, x73, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x74, x76, &x91);
+{ uint32_t x94; uint8_t _ = _addcarryx_u32(0x0, x92, x77, &x94);
+{ uint32_t _; uint8_t x98 = _addcarryx_u32(0x0, x25, x61, &_);
+{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x43, x79, &x100);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x46, x82, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x49, x85, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x52, x88, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x55, x91, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x58, x94, &x115);
+{ uint32_t x119; uint32_t x118 = _mulx_u32(x7, x15, &x119);
+{ uint32_t x122; uint32_t x121 = _mulx_u32(x7, x17, &x122);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x19, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x21, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x23, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x22, &x134);
+{ uint32_t x136; uint8_t x137 = _addcarryx_u32(0x0, x119, x121, &x136);
+{ uint32_t x139; uint8_t x140 = _addcarryx_u32(x137, x122, x124, &x139);
+{ uint32_t x142; uint8_t x143 = _addcarryx_u32(x140, x125, x127, &x142);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(x143, x128, x130, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x131, x133, &x148);
+{ uint32_t x151; uint8_t _ = _addcarryx_u32(0x0, x149, x134, &x151);
+{ uint32_t x154; uint8_t x155 = _addcarryx_u32(0x0, x100, x118, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x103, x136, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x106, x139, &x160);
+{ uint32_t x163; uint8_t x164 = _addcarryx_u32(x161, x109, x142, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(x164, x112, x145, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x115, x148, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x116, x151, &x172);
+{ uint32_t x176; uint32_t x175 = _mulx_u32(x154, 0xffffffff, &x176);
+{ uint32_t x179; uint32_t x178 = _mulx_u32(x154, 0xffffffff, &x179);
+{ uint32_t x182; uint32_t x181 = _mulx_u32(x154, 0xfffffffe, &x182);
+{ uint32_t x185; uint32_t x184 = _mulx_u32(x154, 0xffffffff, &x185);
+{ uint32_t x188; uint32_t x187 = _mulx_u32(x154, 0xffffffff, &x188);
+{ uint32_t x191; uint32_t x190 = _mulx_u32(x154, 0xffffffff, &x191);
+{ uint32_t x193; uint8_t x194 = _addcarryx_u32(0x0, x176, x178, &x193);
+{ uint32_t x196; uint8_t x197 = _addcarryx_u32(x194, x179, x181, &x196);
+{ uint32_t x199; uint8_t x200 = _addcarryx_u32(x197, x182, x184, &x199);
+{ uint32_t x202; uint8_t x203 = _addcarryx_u32(x200, x185, x187, &x202);
+{ uint32_t x205; uint8_t x206 = _addcarryx_u32(x203, x188, x190, &x205);
+{ uint32_t x208; uint8_t _ = _addcarryx_u32(0x0, x206, x191, &x208);
+{ uint32_t _; uint8_t x212 = _addcarryx_u32(0x0, x154, x175, &_);
+{ uint32_t x214; uint8_t x215 = _addcarryx_u32(x212, x157, x193, &x214);
+{ uint32_t x217; uint8_t x218 = _addcarryx_u32(x215, x160, x196, &x217);
+{ uint32_t x220; uint8_t x221 = _addcarryx_u32(x218, x163, x199, &x220);
+{ uint32_t x223; uint8_t x224 = _addcarryx_u32(x221, x166, x202, &x223);
+{ uint32_t x226; uint8_t x227 = _addcarryx_u32(x224, x169, x205, &x226);
+{ uint32_t x229; uint8_t x230 = _addcarryx_u32(x227, x172, x208, &x229);
+{ uint8_t x231 = (x230 + x173);
+{ uint32_t x234; uint32_t x233 = _mulx_u32(x9, x15, &x234);
+{ uint32_t x237; uint32_t x236 = _mulx_u32(x9, x17, &x237);
+{ uint32_t x240; uint32_t x239 = _mulx_u32(x9, x19, &x240);
+{ uint32_t x243; uint32_t x242 = _mulx_u32(x9, x21, &x243);
+{ uint32_t x246; uint32_t x245 = _mulx_u32(x9, x23, &x246);
+{ uint32_t x249; uint32_t x248 = _mulx_u32(x9, x22, &x249);
+{ uint32_t x251; uint8_t x252 = _addcarryx_u32(0x0, x234, x236, &x251);
+{ uint32_t x254; uint8_t x255 = _addcarryx_u32(x252, x237, x239, &x254);
+{ uint32_t x257; uint8_t x258 = _addcarryx_u32(x255, x240, x242, &x257);
+{ uint32_t x260; uint8_t x261 = _addcarryx_u32(x258, x243, x245, &x260);
+{ uint32_t x263; uint8_t x264 = _addcarryx_u32(x261, x246, x248, &x263);
+{ uint32_t x266; uint8_t _ = _addcarryx_u32(0x0, x264, x249, &x266);
+{ uint32_t x269; uint8_t x270 = _addcarryx_u32(0x0, x214, x233, &x269);
+{ uint32_t x272; uint8_t x273 = _addcarryx_u32(x270, x217, x251, &x272);
+{ uint32_t x275; uint8_t x276 = _addcarryx_u32(x273, x220, x254, &x275);
+{ uint32_t x278; uint8_t x279 = _addcarryx_u32(x276, x223, x257, &x278);
+{ uint32_t x281; uint8_t x282 = _addcarryx_u32(x279, x226, x260, &x281);
+{ uint32_t x284; uint8_t x285 = _addcarryx_u32(x282, x229, x263, &x284);
+{ uint32_t x287; uint8_t x288 = _addcarryx_u32(x285, x231, x266, &x287);
+{ uint32_t x291; uint32_t x290 = _mulx_u32(x269, 0xffffffff, &x291);
+{ uint32_t x294; uint32_t x293 = _mulx_u32(x269, 0xffffffff, &x294);
+{ uint32_t x297; uint32_t x296 = _mulx_u32(x269, 0xfffffffe, &x297);
+{ uint32_t x300; uint32_t x299 = _mulx_u32(x269, 0xffffffff, &x300);
+{ uint32_t x303; uint32_t x302 = _mulx_u32(x269, 0xffffffff, &x303);
+{ uint32_t x306; uint32_t x305 = _mulx_u32(x269, 0xffffffff, &x306);
+{ uint32_t x308; uint8_t x309 = _addcarryx_u32(0x0, x291, x293, &x308);
+{ uint32_t x311; uint8_t x312 = _addcarryx_u32(x309, x294, x296, &x311);
+{ uint32_t x314; uint8_t x315 = _addcarryx_u32(x312, x297, x299, &x314);
+{ uint32_t x317; uint8_t x318 = _addcarryx_u32(x315, x300, x302, &x317);
+{ uint32_t x320; uint8_t x321 = _addcarryx_u32(x318, x303, x305, &x320);
+{ uint32_t x323; uint8_t _ = _addcarryx_u32(0x0, x321, x306, &x323);
+{ uint32_t _; uint8_t x327 = _addcarryx_u32(0x0, x269, x290, &_);
+{ uint32_t x329; uint8_t x330 = _addcarryx_u32(x327, x272, x308, &x329);
+{ uint32_t x332; uint8_t x333 = _addcarryx_u32(x330, x275, x311, &x332);
+{ uint32_t x335; uint8_t x336 = _addcarryx_u32(x333, x278, x314, &x335);
+{ uint32_t x338; uint8_t x339 = _addcarryx_u32(x336, x281, x317, &x338);
+{ uint32_t x341; uint8_t x342 = _addcarryx_u32(x339, x284, x320, &x341);
+{ uint32_t x344; uint8_t x345 = _addcarryx_u32(x342, x287, x323, &x344);
+{ uint8_t x346 = (x345 + x288);
+{ uint32_t x349; uint32_t x348 = _mulx_u32(x11, x15, &x349);
+{ uint32_t x352; uint32_t x351 = _mulx_u32(x11, x17, &x352);
+{ uint32_t x355; uint32_t x354 = _mulx_u32(x11, x19, &x355);
+{ uint32_t x358; uint32_t x357 = _mulx_u32(x11, x21, &x358);
+{ uint32_t x361; uint32_t x360 = _mulx_u32(x11, x23, &x361);
+{ uint32_t x364; uint32_t x363 = _mulx_u32(x11, x22, &x364);
+{ uint32_t x366; uint8_t x367 = _addcarryx_u32(0x0, x349, x351, &x366);
+{ uint32_t x369; uint8_t x370 = _addcarryx_u32(x367, x352, x354, &x369);
+{ uint32_t x372; uint8_t x373 = _addcarryx_u32(x370, x355, x357, &x372);
+{ uint32_t x375; uint8_t x376 = _addcarryx_u32(x373, x358, x360, &x375);
+{ uint32_t x378; uint8_t x379 = _addcarryx_u32(x376, x361, x363, &x378);
+{ uint32_t x381; uint8_t _ = _addcarryx_u32(0x0, x379, x364, &x381);
+{ uint32_t x384; uint8_t x385 = _addcarryx_u32(0x0, x329, x348, &x384);
+{ uint32_t x387; uint8_t x388 = _addcarryx_u32(x385, x332, x366, &x387);
+{ uint32_t x390; uint8_t x391 = _addcarryx_u32(x388, x335, x369, &x390);
+{ uint32_t x393; uint8_t x394 = _addcarryx_u32(x391, x338, x372, &x393);
+{ uint32_t x396; uint8_t x397 = _addcarryx_u32(x394, x341, x375, &x396);
+{ uint32_t x399; uint8_t x400 = _addcarryx_u32(x397, x344, x378, &x399);
+{ uint32_t x402; uint8_t x403 = _addcarryx_u32(x400, x346, x381, &x402);
+{ uint32_t x406; uint32_t x405 = _mulx_u32(x384, 0xffffffff, &x406);
+{ uint32_t x409; uint32_t x408 = _mulx_u32(x384, 0xffffffff, &x409);
+{ uint32_t x412; uint32_t x411 = _mulx_u32(x384, 0xfffffffe, &x412);
+{ uint32_t x415; uint32_t x414 = _mulx_u32(x384, 0xffffffff, &x415);
+{ uint32_t x418; uint32_t x417 = _mulx_u32(x384, 0xffffffff, &x418);
+{ uint32_t x421; uint32_t x420 = _mulx_u32(x384, 0xffffffff, &x421);
+{ uint32_t x423; uint8_t x424 = _addcarryx_u32(0x0, x406, x408, &x423);
+{ uint32_t x426; uint8_t x427 = _addcarryx_u32(x424, x409, x411, &x426);
+{ uint32_t x429; uint8_t x430 = _addcarryx_u32(x427, x412, x414, &x429);
+{ uint32_t x432; uint8_t x433 = _addcarryx_u32(x430, x415, x417, &x432);
+{ uint32_t x435; uint8_t x436 = _addcarryx_u32(x433, x418, x420, &x435);
+{ uint32_t x438; uint8_t _ = _addcarryx_u32(0x0, x436, x421, &x438);
+{ uint32_t _; uint8_t x442 = _addcarryx_u32(0x0, x384, x405, &_);
+{ uint32_t x444; uint8_t x445 = _addcarryx_u32(x442, x387, x423, &x444);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(x445, x390, x426, &x447);
+{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x393, x429, &x450);
+{ uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x396, x432, &x453);
+{ uint32_t x456; uint8_t x457 = _addcarryx_u32(x454, x399, x435, &x456);
+{ uint32_t x459; uint8_t x460 = _addcarryx_u32(x457, x402, x438, &x459);
+{ uint8_t x461 = (x460 + x403);
+{ uint32_t x464; uint32_t x463 = _mulx_u32(x13, x15, &x464);
+{ uint32_t x467; uint32_t x466 = _mulx_u32(x13, x17, &x467);
+{ uint32_t x470; uint32_t x469 = _mulx_u32(x13, x19, &x470);
+{ uint32_t x473; uint32_t x472 = _mulx_u32(x13, x21, &x473);
+{ uint32_t x476; uint32_t x475 = _mulx_u32(x13, x23, &x476);
+{ uint32_t x479; uint32_t x478 = _mulx_u32(x13, x22, &x479);
+{ uint32_t x481; uint8_t x482 = _addcarryx_u32(0x0, x464, x466, &x481);
+{ uint32_t x484; uint8_t x485 = _addcarryx_u32(x482, x467, x469, &x484);
+{ uint32_t x487; uint8_t x488 = _addcarryx_u32(x485, x470, x472, &x487);
+{ uint32_t x490; uint8_t x491 = _addcarryx_u32(x488, x473, x475, &x490);
+{ uint32_t x493; uint8_t x494 = _addcarryx_u32(x491, x476, x478, &x493);
+{ uint32_t x496; uint8_t _ = _addcarryx_u32(0x0, x494, x479, &x496);
+{ uint32_t x499; uint8_t x500 = _addcarryx_u32(0x0, x444, x463, &x499);
+{ uint32_t x502; uint8_t x503 = _addcarryx_u32(x500, x447, x481, &x502);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(x503, x450, x484, &x505);
+{ uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x453, x487, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x456, x490, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x459, x493, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x461, x496, &x517);
+{ uint32_t x521; uint32_t x520 = _mulx_u32(x499, 0xffffffff, &x521);
+{ uint32_t x524; uint32_t x523 = _mulx_u32(x499, 0xffffffff, &x524);
+{ uint32_t x527; uint32_t x526 = _mulx_u32(x499, 0xfffffffe, &x527);
+{ uint32_t x530; uint32_t x529 = _mulx_u32(x499, 0xffffffff, &x530);
+{ uint32_t x533; uint32_t x532 = _mulx_u32(x499, 0xffffffff, &x533);
+{ uint32_t x536; uint32_t x535 = _mulx_u32(x499, 0xffffffff, &x536);
+{ uint32_t x538; uint8_t x539 = _addcarryx_u32(0x0, x521, x523, &x538);
+{ uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x524, x526, &x541);
+{ uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x527, x529, &x544);
+{ uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x530, x532, &x547);
+{ uint32_t x550; uint8_t x551 = _addcarryx_u32(x548, x533, x535, &x550);
+{ uint32_t x553; uint8_t _ = _addcarryx_u32(0x0, x551, x536, &x553);
+{ uint32_t _; uint8_t x557 = _addcarryx_u32(0x0, x499, x520, &_);
+{ uint32_t x559; uint8_t x560 = _addcarryx_u32(x557, x502, x538, &x559);
+{ uint32_t x562; uint8_t x563 = _addcarryx_u32(x560, x505, x541, &x562);
+{ uint32_t x565; uint8_t x566 = _addcarryx_u32(x563, x508, x544, &x565);
+{ uint32_t x568; uint8_t x569 = _addcarryx_u32(x566, x511, x547, &x568);
+{ uint32_t x571; uint8_t x572 = _addcarryx_u32(x569, x514, x550, &x571);
+{ uint32_t x574; uint8_t x575 = _addcarryx_u32(x572, x517, x553, &x574);
+{ uint8_t x576 = (x575 + x518);
+{ uint32_t x579; uint32_t x578 = _mulx_u32(x12, x15, &x579);
+{ uint32_t x582; uint32_t x581 = _mulx_u32(x12, x17, &x582);
+{ uint32_t x585; uint32_t x584 = _mulx_u32(x12, x19, &x585);
+{ uint32_t x588; uint32_t x587 = _mulx_u32(x12, x21, &x588);
+{ uint32_t x591; uint32_t x590 = _mulx_u32(x12, x23, &x591);
+{ uint32_t x594; uint32_t x593 = _mulx_u32(x12, x22, &x594);
+{ uint32_t x596; uint8_t x597 = _addcarryx_u32(0x0, x579, x581, &x596);
+{ uint32_t x599; uint8_t x600 = _addcarryx_u32(x597, x582, x584, &x599);
+{ uint32_t x602; uint8_t x603 = _addcarryx_u32(x600, x585, x587, &x602);
+{ uint32_t x605; uint8_t x606 = _addcarryx_u32(x603, x588, x590, &x605);
+{ uint32_t x608; uint8_t x609 = _addcarryx_u32(x606, x591, x593, &x608);
+{ uint32_t x611; uint8_t _ = _addcarryx_u32(0x0, x609, x594, &x611);
+{ uint32_t x614; uint8_t x615 = _addcarryx_u32(0x0, x559, x578, &x614);
+{ uint32_t x617; uint8_t x618 = _addcarryx_u32(x615, x562, x596, &x617);
+{ uint32_t x620; uint8_t x621 = _addcarryx_u32(x618, x565, x599, &x620);
+{ uint32_t x623; uint8_t x624 = _addcarryx_u32(x621, x568, x602, &x623);
+{ uint32_t x626; uint8_t x627 = _addcarryx_u32(x624, x571, x605, &x626);
+{ uint32_t x629; uint8_t x630 = _addcarryx_u32(x627, x574, x608, &x629);
+{ uint32_t x632; uint8_t x633 = _addcarryx_u32(x630, x576, x611, &x632);
+{ uint32_t x636; uint32_t x635 = _mulx_u32(x614, 0xffffffff, &x636);
+{ uint32_t x639; uint32_t x638 = _mulx_u32(x614, 0xffffffff, &x639);
+{ uint32_t x642; uint32_t x641 = _mulx_u32(x614, 0xfffffffe, &x642);
+{ uint32_t x645; uint32_t x644 = _mulx_u32(x614, 0xffffffff, &x645);
+{ uint32_t x648; uint32_t x647 = _mulx_u32(x614, 0xffffffff, &x648);
+{ uint32_t x651; uint32_t x650 = _mulx_u32(x614, 0xffffffff, &x651);
+{ uint32_t x653; uint8_t x654 = _addcarryx_u32(0x0, x636, x638, &x653);
+{ uint32_t x656; uint8_t x657 = _addcarryx_u32(x654, x639, x641, &x656);
+{ uint32_t x659; uint8_t x660 = _addcarryx_u32(x657, x642, x644, &x659);
+{ uint32_t x662; uint8_t x663 = _addcarryx_u32(x660, x645, x647, &x662);
+{ uint32_t x665; uint8_t x666 = _addcarryx_u32(x663, x648, x650, &x665);
+{ uint32_t x668; uint8_t _ = _addcarryx_u32(0x0, x666, x651, &x668);
+{ uint32_t _; uint8_t x672 = _addcarryx_u32(0x0, x614, x635, &_);
+{ uint32_t x674; uint8_t x675 = _addcarryx_u32(x672, x617, x653, &x674);
+{ uint32_t x677; uint8_t x678 = _addcarryx_u32(x675, x620, x656, &x677);
+{ uint32_t x680; uint8_t x681 = _addcarryx_u32(x678, x623, x659, &x680);
+{ uint32_t x683; uint8_t x684 = _addcarryx_u32(x681, x626, x662, &x683);
+{ uint32_t x686; uint8_t x687 = _addcarryx_u32(x684, x629, x665, &x686);
+{ uint32_t x689; uint8_t x690 = _addcarryx_u32(x687, x632, x668, &x689);
+{ uint8_t x691 = (x690 + x633);
+{ uint32_t x693; uint8_t x694 = _subborrow_u32(0x0, x674, 0xffffffff, &x693);
+{ uint32_t x696; uint8_t x697 = _subborrow_u32(x694, x677, 0xffffffff, &x696);
+{ uint32_t x699; uint8_t x700 = _subborrow_u32(x697, x680, 0xfffffffe, &x699);
+{ uint32_t x702; uint8_t x703 = _subborrow_u32(x700, x683, 0xffffffff, &x702);
+{ uint32_t x705; uint8_t x706 = _subborrow_u32(x703, x686, 0xffffffff, &x705);
+{ uint32_t x708; uint8_t x709 = _subborrow_u32(x706, x689, 0xffffffff, &x708);
+{ uint32_t _; uint8_t x712 = _subborrow_u32(x709, x691, 0x0, &_);
+{ uint32_t x713 = cmovznz(x712, x708, x689);
+{ uint32_t x714 = cmovznz(x712, x705, x686);
+{ uint32_t x715 = cmovznz(x712, x702, x683);
+{ uint32_t x716 = cmovznz(x712, x699, x680);
+{ uint32_t x717 = cmovznz(x712, x696, x677);
+{ uint32_t x718 = cmovznz(x712, x693, x674);
+out[0] = x713;
+out[1] = x714;
+out[2] = x715;
+out[3] = x716;
+out[4] = x717;
+out[5] = x718;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e192m2e64m1/femul.h b/src/Specific/montgomery32_2e192m2e64m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fenz.c b/src/Specific/montgomery32_2e192m2e64m1/fenz.c
new file mode 100644
index 000000000..6d8132b20
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x11 = (x10 | x9);
+{ uint32_t x12 = (x8 | x11);
+{ uint32_t x13 = (x6 | x12);
+{ uint32_t x14 = (x4 | x13);
+{ uint32_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fenz.h b/src/Specific/montgomery32_2e192m2e64m1/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feopp.c b/src/Specific/montgomery32_2e192m2e64m1/feopp.c
new file mode 100644
index 000000000..c55b28253
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x12; uint8_t x13 = _subborrow_u32(0x0, 0x0, x2, &x12);
+{ uint32_t x15; uint8_t x16 = _subborrow_u32(x13, 0x0, x4, &x15);
+{ uint32_t x18; uint8_t x19 = _subborrow_u32(x16, 0x0, x6, &x18);
+{ uint32_t x21; uint8_t x22 = _subborrow_u32(x19, 0x0, x8, &x21);
+{ uint32_t x24; uint8_t x25 = _subborrow_u32(x22, 0x0, x10, &x24);
+{ uint32_t x27; uint8_t x28 = _subborrow_u32(x25, 0x0, x9, &x27);
+{ uint32_t x29 = (uint32_t)cmovznz(x28, 0x0, 0xffffffff);
+{ uint32_t x30 = (x29 & 0xffffffff);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(0x0, x12, x30, &x32);
+{ uint32_t x34 = (x29 & 0xffffffff);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x33, x15, x34, &x36);
+{ uint32_t x38 = (x29 & 0xfffffffe);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x37, x18, x38, &x40);
+{ uint32_t x42 = (x29 & 0xffffffff);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x41, x21, x42, &x44);
+{ uint32_t x46 = (x29 & 0xffffffff);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x45, x24, x46, &x48);
+{ uint32_t x50 = (x29 & 0xffffffff);
+{ uint32_t x52; uint8_t _ = _addcarryx_u32(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e192m2e64m1/feopp.h b/src/Specific/montgomery32_2e192m2e64m1/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fesub.c b/src/Specific/montgomery32_2e192m2e64m1/fesub.c
new file mode 100644
index 000000000..252303502
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint32_t x25; uint8_t x26 = _subborrow_u32(0x0, x5, x15, &x25);
+{ uint32_t x28; uint8_t x29 = _subborrow_u32(x26, x7, x17, &x28);
+{ uint32_t x31; uint8_t x32 = _subborrow_u32(x29, x9, x19, &x31);
+{ uint32_t x34; uint8_t x35 = _subborrow_u32(x32, x11, x21, &x34);
+{ uint32_t x37; uint8_t x38 = _subborrow_u32(x35, x13, x23, &x37);
+{ uint32_t x40; uint8_t x41 = _subborrow_u32(x38, x12, x22, &x40);
+{ uint32_t x42 = (uint32_t)cmovznz(x41, 0x0, 0xffffffff);
+{ uint32_t x43 = (x42 & 0xffffffff);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x25, x43, &x45);
+{ uint32_t x47 = (x42 & 0xffffffff);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x46, x28, x47, &x49);
+{ uint32_t x51 = (x42 & 0xfffffffe);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x50, x31, x51, &x53);
+{ uint32_t x55 = (x42 & 0xffffffff);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x54, x34, x55, &x57);
+{ uint32_t x59 = (x42 & 0xffffffff);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x58, x37, x59, &x61);
+{ uint32_t x63 = (x42 & 0xffffffff);
+{ uint32_t x65; uint8_t _ = _addcarryx_u32(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery32_2e192m2e64m1/fesub.h b/src/Specific/montgomery32_2e192m2e64m1/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery32_2e192m2e64m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery32_2e194m33/feadd.c b/src/Specific/montgomery32_2e194m33/feadd.c
new file mode 100644
index 000000000..552319502
--- /dev/null
+++ b/src/Specific/montgomery32_2e194m33/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffdf, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e194m33/feadd.h b/src/Specific/montgomery32_2e194m33/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e194m33/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e194m33/fenz.c b/src/Specific/montgomery32_2e194m33/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e194m33/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e194m33/fenz.h b/src/Specific/montgomery32_2e194m33/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e194m33/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e196m15/feadd.c b/src/Specific/montgomery32_2e196m15/feadd.c
new file mode 100644
index 000000000..9814fd24f
--- /dev/null
+++ b/src/Specific/montgomery32_2e196m15/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffff1, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xf, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e196m15/feadd.h b/src/Specific/montgomery32_2e196m15/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e196m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e196m15/fenz.c b/src/Specific/montgomery32_2e196m15/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e196m15/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e196m15/fenz.h b/src/Specific/montgomery32_2e196m15/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e196m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e198m17/feadd.c b/src/Specific/montgomery32_2e198m17/feadd.c
new file mode 100644
index 000000000..95d09c821
--- /dev/null
+++ b/src/Specific/montgomery32_2e198m17/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffef, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3f, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e198m17/feadd.h b/src/Specific/montgomery32_2e198m17/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e198m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e198m17/fenz.c b/src/Specific/montgomery32_2e198m17/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e198m17/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e198m17/fenz.h b/src/Specific/montgomery32_2e198m17/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e198m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c
new file mode 100644
index 000000000..8233bfac7
--- /dev/null
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x14bf, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/feadd.h b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e205m45x2e198m1/fenz.h b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e205m45x2e198m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e206m5/feadd.c b/src/Specific/montgomery32_2e206m5/feadd.c
new file mode 100644
index 000000000..d2dbcd052
--- /dev/null
+++ b/src/Specific/montgomery32_2e206m5/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffb, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e206m5/feadd.h b/src/Specific/montgomery32_2e206m5/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e206m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e206m5/fenz.c b/src/Specific/montgomery32_2e206m5/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e206m5/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e206m5/fenz.h b/src/Specific/montgomery32_2e206m5/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e206m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e212m29/feadd.c b/src/Specific/montgomery32_2e212m29/feadd.c
new file mode 100644
index 000000000..a5d7c69ae
--- /dev/null
+++ b/src/Specific/montgomery32_2e212m29/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffe3, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xfffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e212m29/feadd.h b/src/Specific/montgomery32_2e212m29/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e212m29/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e212m29/fenz.c b/src/Specific/montgomery32_2e212m29/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e212m29/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e212m29/fenz.h b/src/Specific/montgomery32_2e212m29/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e212m29/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e213m3/feadd.c b/src/Specific/montgomery32_2e213m3/feadd.c
new file mode 100644
index 000000000..ae525568b
--- /dev/null
+++ b/src/Specific/montgomery32_2e213m3/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e213m3/feadd.h b/src/Specific/montgomery32_2e213m3/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e213m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e213m3/fenz.c b/src/Specific/montgomery32_2e213m3/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e213m3/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e213m3/fenz.h b/src/Specific/montgomery32_2e213m3/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e213m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e216m2e108m1/feadd.c b/src/Specific/montgomery32_2e216m2e108m1/feadd.c
new file mode 100644
index 000000000..b0e96bfa8
--- /dev/null
+++ b/src/Specific/montgomery32_2e216m2e108m1/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffffff, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffefff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e216m2e108m1/feadd.h b/src/Specific/montgomery32_2e216m2e108m1/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e216m2e108m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e216m2e108m1/fenz.c b/src/Specific/montgomery32_2e216m2e108m1/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e216m2e108m1/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e216m2e108m1/fenz.h b/src/Specific/montgomery32_2e216m2e108m1/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e216m2e108m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e221m3/feadd.c b/src/Specific/montgomery32_2e221m3/feadd.c
new file mode 100644
index 000000000..d43caf856
--- /dev/null
+++ b/src/Specific/montgomery32_2e221m3/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xfffffffd, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x1fffffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e221m3/feadd.h b/src/Specific/montgomery32_2e221m3/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e221m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e221m3/fenz.c b/src/Specific/montgomery32_2e221m3/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e221m3/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e221m3/fenz.h b/src/Specific/montgomery32_2e221m3/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e221m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e222m117/feadd.c b/src/Specific/montgomery32_2e222m117/feadd.c
new file mode 100644
index 000000000..ef4baa59b
--- /dev/null
+++ b/src/Specific/montgomery32_2e222m117/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0xffffff8b, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0xffffffff, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0xffffffff, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0x3fffffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e222m117/feadd.h b/src/Specific/montgomery32_2e222m117/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e222m117/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e222m117/fenz.c b/src/Specific/montgomery32_2e222m117/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e222m117/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e222m117/fenz.h b/src/Specific/montgomery32_2e222m117/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e222m117/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e224m2e96p1/feadd.c b/src/Specific/montgomery32_2e224m2e96p1/feadd.c
new file mode 100644
index 000000000..8f088ce7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x29; uint8_t x30 = _addcarryx_u32(0x0, x5, x17, &x29);
+{ uint32_t x32; uint8_t x33 = _addcarryx_u32(x30, x7, x19, &x32);
+{ uint32_t x35; uint8_t x36 = _addcarryx_u32(x33, x9, x21, &x35);
+{ uint32_t x38; uint8_t x39 = _addcarryx_u32(x36, x11, x23, &x38);
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(x39, x13, x25, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x15, x27, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x14, x26, &x47);
+{ uint32_t x50; uint8_t x51 = _subborrow_u32(0x0, x29, 0x1, &x50);
+{ uint32_t x53; uint8_t x54 = _subborrow_u32(x51, x32, 0x0, &x53);
+{ uint32_t x56; uint8_t x57 = _subborrow_u32(x54, x35, 0x0, &x56);
+{ uint32_t x59; uint8_t x60 = _subborrow_u32(x57, x38, 0xffffffff, &x59);
+{ uint32_t x62; uint8_t x63 = _subborrow_u32(x60, x41, 0xffffffff, &x62);
+{ uint32_t x65; uint8_t x66 = _subborrow_u32(x63, x44, 0xffffffff, &x65);
+{ uint32_t x68; uint8_t x69 = _subborrow_u32(x66, x47, 0xffffffff, &x68);
+{ uint32_t _; uint8_t x72 = _subborrow_u32(x69, x48, 0x0, &_);
+{ uint32_t x73 = cmovznz(x72, x68, x47);
+{ uint32_t x74 = cmovznz(x72, x65, x44);
+{ uint32_t x75 = cmovznz(x72, x62, x41);
+{ uint32_t x76 = cmovznz(x72, x59, x38);
+{ uint32_t x77 = cmovznz(x72, x56, x35);
+{ uint32_t x78 = cmovznz(x72, x53, x32);
+{ uint32_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e224m2e96p1/feadd.h b/src/Specific/montgomery32_2e224m2e96p1/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e224m2e96p1/femul.c b/src/Specific/montgomery32_2e224m2e96p1/femul.c
new file mode 100644
index 000000000..185a40c24
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/femul.c
@@ -0,0 +1,328 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint32_t x30; uint32_t x29 = _mulx_u32(x5, x17, &x30);
+{ uint32_t x33; uint32_t x32 = _mulx_u32(x5, x19, &x33);
+{ uint32_t x36; uint32_t x35 = _mulx_u32(x5, x21, &x36);
+{ uint32_t x39; uint32_t x38 = _mulx_u32(x5, x23, &x39);
+{ uint32_t x42; uint32_t x41 = _mulx_u32(x5, x25, &x42);
+{ uint32_t x45; uint32_t x44 = _mulx_u32(x5, x27, &x45);
+{ uint32_t x48; uint32_t x47 = _mulx_u32(x5, x26, &x48);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(0x0, x30, x32, &x50);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x33, x35, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x36, x38, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x39, x41, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x42, x44, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x45, x47, &x65);
+{ uint32_t x68; uint8_t _ = _addcarryx_u32(0x0, x66, x48, &x68);
+{ uint32_t _; uint32_t x71 = _mulx_u32(x29, 0xffffffff, &_);
+{ uint32_t x75; uint32_t x74 = _mulx_u32(x71, 0xffffffff, &x75);
+{ uint32_t x78; uint32_t x77 = _mulx_u32(x71, 0xffffffff, &x78);
+{ uint32_t x81; uint32_t x80 = _mulx_u32(x71, 0xffffffff, &x81);
+{ uint32_t x84; uint32_t x83 = _mulx_u32(x71, 0xffffffff, &x84);
+{ uint8_t x85 = (0x0 + 0x0);
+{ uint8_t x86 = (0x0 + 0x0);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(0x0, x75, x77, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x78, x80, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x81, x83, &x94);
+{ uint32_t x97; uint8_t _ = _addcarryx_u32(0x0, x95, x84, &x97);
+{ uint32_t _; uint8_t x101 = _addcarryx_u32(0x0, x29, x71, &_);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x50, x85, &x103);
+{ uint32_t x106; uint8_t x107 = _addcarryx_u32(x104, x53, x86, &x106);
+{ uint32_t x109; uint8_t x110 = _addcarryx_u32(x107, x56, x74, &x109);
+{ uint32_t x112; uint8_t x113 = _addcarryx_u32(x110, x59, x88, &x112);
+{ uint32_t x115; uint8_t x116 = _addcarryx_u32(x113, x62, x91, &x115);
+{ uint32_t x118; uint8_t x119 = _addcarryx_u32(x116, x65, x94, &x118);
+{ uint32_t x121; uint8_t x122 = _addcarryx_u32(x119, x68, x97, &x121);
+{ uint32_t x125; uint32_t x124 = _mulx_u32(x7, x17, &x125);
+{ uint32_t x128; uint32_t x127 = _mulx_u32(x7, x19, &x128);
+{ uint32_t x131; uint32_t x130 = _mulx_u32(x7, x21, &x131);
+{ uint32_t x134; uint32_t x133 = _mulx_u32(x7, x23, &x134);
+{ uint32_t x137; uint32_t x136 = _mulx_u32(x7, x25, &x137);
+{ uint32_t x140; uint32_t x139 = _mulx_u32(x7, x27, &x140);
+{ uint32_t x143; uint32_t x142 = _mulx_u32(x7, x26, &x143);
+{ uint32_t x145; uint8_t x146 = _addcarryx_u32(0x0, x125, x127, &x145);
+{ uint32_t x148; uint8_t x149 = _addcarryx_u32(x146, x128, x130, &x148);
+{ uint32_t x151; uint8_t x152 = _addcarryx_u32(x149, x131, x133, &x151);
+{ uint32_t x154; uint8_t x155 = _addcarryx_u32(x152, x134, x136, &x154);
+{ uint32_t x157; uint8_t x158 = _addcarryx_u32(x155, x137, x139, &x157);
+{ uint32_t x160; uint8_t x161 = _addcarryx_u32(x158, x140, x142, &x160);
+{ uint32_t x163; uint8_t _ = _addcarryx_u32(0x0, x161, x143, &x163);
+{ uint32_t x166; uint8_t x167 = _addcarryx_u32(0x0, x103, x124, &x166);
+{ uint32_t x169; uint8_t x170 = _addcarryx_u32(x167, x106, x145, &x169);
+{ uint32_t x172; uint8_t x173 = _addcarryx_u32(x170, x109, x148, &x172);
+{ uint32_t x175; uint8_t x176 = _addcarryx_u32(x173, x112, x151, &x175);
+{ uint32_t x178; uint8_t x179 = _addcarryx_u32(x176, x115, x154, &x178);
+{ uint32_t x181; uint8_t x182 = _addcarryx_u32(x179, x118, x157, &x181);
+{ uint32_t x184; uint8_t x185 = _addcarryx_u32(x182, x121, x160, &x184);
+{ uint32_t x187; uint8_t x188 = _addcarryx_u32(x185, x122, x163, &x187);
+{ uint32_t _; uint32_t x190 = _mulx_u32(x166, 0xffffffff, &_);
+{ uint32_t x194; uint32_t x193 = _mulx_u32(x190, 0xffffffff, &x194);
+{ uint32_t x197; uint32_t x196 = _mulx_u32(x190, 0xffffffff, &x197);
+{ uint32_t x200; uint32_t x199 = _mulx_u32(x190, 0xffffffff, &x200);
+{ uint32_t x203; uint32_t x202 = _mulx_u32(x190, 0xffffffff, &x203);
+{ uint8_t x204 = (0x0 + 0x0);
+{ uint8_t x205 = (0x0 + 0x0);
+{ uint32_t x207; uint8_t x208 = _addcarryx_u32(0x0, x194, x196, &x207);
+{ uint32_t x210; uint8_t x211 = _addcarryx_u32(x208, x197, x199, &x210);
+{ uint32_t x213; uint8_t x214 = _addcarryx_u32(x211, x200, x202, &x213);
+{ uint32_t x216; uint8_t _ = _addcarryx_u32(0x0, x214, x203, &x216);
+{ uint32_t _; uint8_t x220 = _addcarryx_u32(0x0, x166, x190, &_);
+{ uint32_t x222; uint8_t x223 = _addcarryx_u32(x220, x169, x204, &x222);
+{ uint32_t x225; uint8_t x226 = _addcarryx_u32(x223, x172, x205, &x225);
+{ uint32_t x228; uint8_t x229 = _addcarryx_u32(x226, x175, x193, &x228);
+{ uint32_t x231; uint8_t x232 = _addcarryx_u32(x229, x178, x207, &x231);
+{ uint32_t x234; uint8_t x235 = _addcarryx_u32(x232, x181, x210, &x234);
+{ uint32_t x237; uint8_t x238 = _addcarryx_u32(x235, x184, x213, &x237);
+{ uint32_t x240; uint8_t x241 = _addcarryx_u32(x238, x187, x216, &x240);
+{ uint8_t x242 = (x241 + x188);
+{ uint32_t x245; uint32_t x244 = _mulx_u32(x9, x17, &x245);
+{ uint32_t x248; uint32_t x247 = _mulx_u32(x9, x19, &x248);
+{ uint32_t x251; uint32_t x250 = _mulx_u32(x9, x21, &x251);
+{ uint32_t x254; uint32_t x253 = _mulx_u32(x9, x23, &x254);
+{ uint32_t x257; uint32_t x256 = _mulx_u32(x9, x25, &x257);
+{ uint32_t x260; uint32_t x259 = _mulx_u32(x9, x27, &x260);
+{ uint32_t x263; uint32_t x262 = _mulx_u32(x9, x26, &x263);
+{ uint32_t x265; uint8_t x266 = _addcarryx_u32(0x0, x245, x247, &x265);
+{ uint32_t x268; uint8_t x269 = _addcarryx_u32(x266, x248, x250, &x268);
+{ uint32_t x271; uint8_t x272 = _addcarryx_u32(x269, x251, x253, &x271);
+{ uint32_t x274; uint8_t x275 = _addcarryx_u32(x272, x254, x256, &x274);
+{ uint32_t x277; uint8_t x278 = _addcarryx_u32(x275, x257, x259, &x277);
+{ uint32_t x280; uint8_t x281 = _addcarryx_u32(x278, x260, x262, &x280);
+{ uint32_t x283; uint8_t _ = _addcarryx_u32(0x0, x281, x263, &x283);
+{ uint32_t x286; uint8_t x287 = _addcarryx_u32(0x0, x222, x244, &x286);
+{ uint32_t x289; uint8_t x290 = _addcarryx_u32(x287, x225, x265, &x289);
+{ uint32_t x292; uint8_t x293 = _addcarryx_u32(x290, x228, x268, &x292);
+{ uint32_t x295; uint8_t x296 = _addcarryx_u32(x293, x231, x271, &x295);
+{ uint32_t x298; uint8_t x299 = _addcarryx_u32(x296, x234, x274, &x298);
+{ uint32_t x301; uint8_t x302 = _addcarryx_u32(x299, x237, x277, &x301);
+{ uint32_t x304; uint8_t x305 = _addcarryx_u32(x302, x240, x280, &x304);
+{ uint32_t x307; uint8_t x308 = _addcarryx_u32(x305, x242, x283, &x307);
+{ uint32_t _; uint32_t x310 = _mulx_u32(x286, 0xffffffff, &_);
+{ uint32_t x314; uint32_t x313 = _mulx_u32(x310, 0xffffffff, &x314);
+{ uint32_t x317; uint32_t x316 = _mulx_u32(x310, 0xffffffff, &x317);
+{ uint32_t x320; uint32_t x319 = _mulx_u32(x310, 0xffffffff, &x320);
+{ uint32_t x323; uint32_t x322 = _mulx_u32(x310, 0xffffffff, &x323);
+{ uint8_t x324 = (0x0 + 0x0);
+{ uint8_t x325 = (0x0 + 0x0);
+{ uint32_t x327; uint8_t x328 = _addcarryx_u32(0x0, x314, x316, &x327);
+{ uint32_t x330; uint8_t x331 = _addcarryx_u32(x328, x317, x319, &x330);
+{ uint32_t x333; uint8_t x334 = _addcarryx_u32(x331, x320, x322, &x333);
+{ uint32_t x336; uint8_t _ = _addcarryx_u32(0x0, x334, x323, &x336);
+{ uint32_t _; uint8_t x340 = _addcarryx_u32(0x0, x286, x310, &_);
+{ uint32_t x342; uint8_t x343 = _addcarryx_u32(x340, x289, x324, &x342);
+{ uint32_t x345; uint8_t x346 = _addcarryx_u32(x343, x292, x325, &x345);
+{ uint32_t x348; uint8_t x349 = _addcarryx_u32(x346, x295, x313, &x348);
+{ uint32_t x351; uint8_t x352 = _addcarryx_u32(x349, x298, x327, &x351);
+{ uint32_t x354; uint8_t x355 = _addcarryx_u32(x352, x301, x330, &x354);
+{ uint32_t x357; uint8_t x358 = _addcarryx_u32(x355, x304, x333, &x357);
+{ uint32_t x360; uint8_t x361 = _addcarryx_u32(x358, x307, x336, &x360);
+{ uint8_t x362 = (x361 + x308);
+{ uint32_t x365; uint32_t x364 = _mulx_u32(x11, x17, &x365);
+{ uint32_t x368; uint32_t x367 = _mulx_u32(x11, x19, &x368);
+{ uint32_t x371; uint32_t x370 = _mulx_u32(x11, x21, &x371);
+{ uint32_t x374; uint32_t x373 = _mulx_u32(x11, x23, &x374);
+{ uint32_t x377; uint32_t x376 = _mulx_u32(x11, x25, &x377);
+{ uint32_t x380; uint32_t x379 = _mulx_u32(x11, x27, &x380);
+{ uint32_t x383; uint32_t x382 = _mulx_u32(x11, x26, &x383);
+{ uint32_t x385; uint8_t x386 = _addcarryx_u32(0x0, x365, x367, &x385);
+{ uint32_t x388; uint8_t x389 = _addcarryx_u32(x386, x368, x370, &x388);
+{ uint32_t x391; uint8_t x392 = _addcarryx_u32(x389, x371, x373, &x391);
+{ uint32_t x394; uint8_t x395 = _addcarryx_u32(x392, x374, x376, &x394);
+{ uint32_t x397; uint8_t x398 = _addcarryx_u32(x395, x377, x379, &x397);
+{ uint32_t x400; uint8_t x401 = _addcarryx_u32(x398, x380, x382, &x400);
+{ uint32_t x403; uint8_t _ = _addcarryx_u32(0x0, x401, x383, &x403);
+{ uint32_t x406; uint8_t x407 = _addcarryx_u32(0x0, x342, x364, &x406);
+{ uint32_t x409; uint8_t x410 = _addcarryx_u32(x407, x345, x385, &x409);
+{ uint32_t x412; uint8_t x413 = _addcarryx_u32(x410, x348, x388, &x412);
+{ uint32_t x415; uint8_t x416 = _addcarryx_u32(x413, x351, x391, &x415);
+{ uint32_t x418; uint8_t x419 = _addcarryx_u32(x416, x354, x394, &x418);
+{ uint32_t x421; uint8_t x422 = _addcarryx_u32(x419, x357, x397, &x421);
+{ uint32_t x424; uint8_t x425 = _addcarryx_u32(x422, x360, x400, &x424);
+{ uint32_t x427; uint8_t x428 = _addcarryx_u32(x425, x362, x403, &x427);
+{ uint32_t _; uint32_t x430 = _mulx_u32(x406, 0xffffffff, &_);
+{ uint32_t x434; uint32_t x433 = _mulx_u32(x430, 0xffffffff, &x434);
+{ uint32_t x437; uint32_t x436 = _mulx_u32(x430, 0xffffffff, &x437);
+{ uint32_t x440; uint32_t x439 = _mulx_u32(x430, 0xffffffff, &x440);
+{ uint32_t x443; uint32_t x442 = _mulx_u32(x430, 0xffffffff, &x443);
+{ uint8_t x444 = (0x0 + 0x0);
+{ uint8_t x445 = (0x0 + 0x0);
+{ uint32_t x447; uint8_t x448 = _addcarryx_u32(0x0, x434, x436, &x447);
+{ uint32_t x450; uint8_t x451 = _addcarryx_u32(x448, x437, x439, &x450);
+{ uint32_t x453; uint8_t x454 = _addcarryx_u32(x451, x440, x442, &x453);
+{ uint32_t x456; uint8_t _ = _addcarryx_u32(0x0, x454, x443, &x456);
+{ uint32_t _; uint8_t x460 = _addcarryx_u32(0x0, x406, x430, &_);
+{ uint32_t x462; uint8_t x463 = _addcarryx_u32(x460, x409, x444, &x462);
+{ uint32_t x465; uint8_t x466 = _addcarryx_u32(x463, x412, x445, &x465);
+{ uint32_t x468; uint8_t x469 = _addcarryx_u32(x466, x415, x433, &x468);
+{ uint32_t x471; uint8_t x472 = _addcarryx_u32(x469, x418, x447, &x471);
+{ uint32_t x474; uint8_t x475 = _addcarryx_u32(x472, x421, x450, &x474);
+{ uint32_t x477; uint8_t x478 = _addcarryx_u32(x475, x424, x453, &x477);
+{ uint32_t x480; uint8_t x481 = _addcarryx_u32(x478, x427, x456, &x480);
+{ uint8_t x482 = (x481 + x428);
+{ uint32_t x485; uint32_t x484 = _mulx_u32(x13, x17, &x485);
+{ uint32_t x488; uint32_t x487 = _mulx_u32(x13, x19, &x488);
+{ uint32_t x491; uint32_t x490 = _mulx_u32(x13, x21, &x491);
+{ uint32_t x494; uint32_t x493 = _mulx_u32(x13, x23, &x494);
+{ uint32_t x497; uint32_t x496 = _mulx_u32(x13, x25, &x497);
+{ uint32_t x500; uint32_t x499 = _mulx_u32(x13, x27, &x500);
+{ uint32_t x503; uint32_t x502 = _mulx_u32(x13, x26, &x503);
+{ uint32_t x505; uint8_t x506 = _addcarryx_u32(0x0, x485, x487, &x505);
+{ uint32_t x508; uint8_t x509 = _addcarryx_u32(x506, x488, x490, &x508);
+{ uint32_t x511; uint8_t x512 = _addcarryx_u32(x509, x491, x493, &x511);
+{ uint32_t x514; uint8_t x515 = _addcarryx_u32(x512, x494, x496, &x514);
+{ uint32_t x517; uint8_t x518 = _addcarryx_u32(x515, x497, x499, &x517);
+{ uint32_t x520; uint8_t x521 = _addcarryx_u32(x518, x500, x502, &x520);
+{ uint32_t x523; uint8_t _ = _addcarryx_u32(0x0, x521, x503, &x523);
+{ uint32_t x526; uint8_t x527 = _addcarryx_u32(0x0, x462, x484, &x526);
+{ uint32_t x529; uint8_t x530 = _addcarryx_u32(x527, x465, x505, &x529);
+{ uint32_t x532; uint8_t x533 = _addcarryx_u32(x530, x468, x508, &x532);
+{ uint32_t x535; uint8_t x536 = _addcarryx_u32(x533, x471, x511, &x535);
+{ uint32_t x538; uint8_t x539 = _addcarryx_u32(x536, x474, x514, &x538);
+{ uint32_t x541; uint8_t x542 = _addcarryx_u32(x539, x477, x517, &x541);
+{ uint32_t x544; uint8_t x545 = _addcarryx_u32(x542, x480, x520, &x544);
+{ uint32_t x547; uint8_t x548 = _addcarryx_u32(x545, x482, x523, &x547);
+{ uint32_t _; uint32_t x550 = _mulx_u32(x526, 0xffffffff, &_);
+{ uint32_t x554; uint32_t x553 = _mulx_u32(x550, 0xffffffff, &x554);
+{ uint32_t x557; uint32_t x556 = _mulx_u32(x550, 0xffffffff, &x557);
+{ uint32_t x560; uint32_t x559 = _mulx_u32(x550, 0xffffffff, &x560);
+{ uint32_t x563; uint32_t x562 = _mulx_u32(x550, 0xffffffff, &x563);
+{ uint8_t x564 = (0x0 + 0x0);
+{ uint8_t x565 = (0x0 + 0x0);
+{ uint32_t x567; uint8_t x568 = _addcarryx_u32(0x0, x554, x556, &x567);
+{ uint32_t x570; uint8_t x571 = _addcarryx_u32(x568, x557, x559, &x570);
+{ uint32_t x573; uint8_t x574 = _addcarryx_u32(x571, x560, x562, &x573);
+{ uint32_t x576; uint8_t _ = _addcarryx_u32(0x0, x574, x563, &x576);
+{ uint32_t _; uint8_t x580 = _addcarryx_u32(0x0, x526, x550, &_);
+{ uint32_t x582; uint8_t x583 = _addcarryx_u32(x580, x529, x564, &x582);
+{ uint32_t x585; uint8_t x586 = _addcarryx_u32(x583, x532, x565, &x585);
+{ uint32_t x588; uint8_t x589 = _addcarryx_u32(x586, x535, x553, &x588);
+{ uint32_t x591; uint8_t x592 = _addcarryx_u32(x589, x538, x567, &x591);
+{ uint32_t x594; uint8_t x595 = _addcarryx_u32(x592, x541, x570, &x594);
+{ uint32_t x597; uint8_t x598 = _addcarryx_u32(x595, x544, x573, &x597);
+{ uint32_t x600; uint8_t x601 = _addcarryx_u32(x598, x547, x576, &x600);
+{ uint8_t x602 = (x601 + x548);
+{ uint32_t x605; uint32_t x604 = _mulx_u32(x15, x17, &x605);
+{ uint32_t x608; uint32_t x607 = _mulx_u32(x15, x19, &x608);
+{ uint32_t x611; uint32_t x610 = _mulx_u32(x15, x21, &x611);
+{ uint32_t x614; uint32_t x613 = _mulx_u32(x15, x23, &x614);
+{ uint32_t x617; uint32_t x616 = _mulx_u32(x15, x25, &x617);
+{ uint32_t x620; uint32_t x619 = _mulx_u32(x15, x27, &x620);
+{ uint32_t x623; uint32_t x622 = _mulx_u32(x15, x26, &x623);
+{ uint32_t x625; uint8_t x626 = _addcarryx_u32(0x0, x605, x607, &x625);
+{ uint32_t x628; uint8_t x629 = _addcarryx_u32(x626, x608, x610, &x628);
+{ uint32_t x631; uint8_t x632 = _addcarryx_u32(x629, x611, x613, &x631);
+{ uint32_t x634; uint8_t x635 = _addcarryx_u32(x632, x614, x616, &x634);
+{ uint32_t x637; uint8_t x638 = _addcarryx_u32(x635, x617, x619, &x637);
+{ uint32_t x640; uint8_t x641 = _addcarryx_u32(x638, x620, x622, &x640);
+{ uint32_t x643; uint8_t _ = _addcarryx_u32(0x0, x641, x623, &x643);
+{ uint32_t x646; uint8_t x647 = _addcarryx_u32(0x0, x582, x604, &x646);
+{ uint32_t x649; uint8_t x650 = _addcarryx_u32(x647, x585, x625, &x649);
+{ uint32_t x652; uint8_t x653 = _addcarryx_u32(x650, x588, x628, &x652);
+{ uint32_t x655; uint8_t x656 = _addcarryx_u32(x653, x591, x631, &x655);
+{ uint32_t x658; uint8_t x659 = _addcarryx_u32(x656, x594, x634, &x658);
+{ uint32_t x661; uint8_t x662 = _addcarryx_u32(x659, x597, x637, &x661);
+{ uint32_t x664; uint8_t x665 = _addcarryx_u32(x662, x600, x640, &x664);
+{ uint32_t x667; uint8_t x668 = _addcarryx_u32(x665, x602, x643, &x667);
+{ uint32_t _; uint32_t x670 = _mulx_u32(x646, 0xffffffff, &_);
+{ uint32_t x674; uint32_t x673 = _mulx_u32(x670, 0xffffffff, &x674);
+{ uint32_t x677; uint32_t x676 = _mulx_u32(x670, 0xffffffff, &x677);
+{ uint32_t x680; uint32_t x679 = _mulx_u32(x670, 0xffffffff, &x680);
+{ uint32_t x683; uint32_t x682 = _mulx_u32(x670, 0xffffffff, &x683);
+{ uint8_t x684 = (0x0 + 0x0);
+{ uint8_t x685 = (0x0 + 0x0);
+{ uint32_t x687; uint8_t x688 = _addcarryx_u32(0x0, x674, x676, &x687);
+{ uint32_t x690; uint8_t x691 = _addcarryx_u32(x688, x677, x679, &x690);
+{ uint32_t x693; uint8_t x694 = _addcarryx_u32(x691, x680, x682, &x693);
+{ uint32_t x696; uint8_t _ = _addcarryx_u32(0x0, x694, x683, &x696);
+{ uint32_t _; uint8_t x700 = _addcarryx_u32(0x0, x646, x670, &_);
+{ uint32_t x702; uint8_t x703 = _addcarryx_u32(x700, x649, x684, &x702);
+{ uint32_t x705; uint8_t x706 = _addcarryx_u32(x703, x652, x685, &x705);
+{ uint32_t x708; uint8_t x709 = _addcarryx_u32(x706, x655, x673, &x708);
+{ uint32_t x711; uint8_t x712 = _addcarryx_u32(x709, x658, x687, &x711);
+{ uint32_t x714; uint8_t x715 = _addcarryx_u32(x712, x661, x690, &x714);
+{ uint32_t x717; uint8_t x718 = _addcarryx_u32(x715, x664, x693, &x717);
+{ uint32_t x720; uint8_t x721 = _addcarryx_u32(x718, x667, x696, &x720);
+{ uint8_t x722 = (x721 + x668);
+{ uint32_t x725; uint32_t x724 = _mulx_u32(x14, x17, &x725);
+{ uint32_t x728; uint32_t x727 = _mulx_u32(x14, x19, &x728);
+{ uint32_t x731; uint32_t x730 = _mulx_u32(x14, x21, &x731);
+{ uint32_t x734; uint32_t x733 = _mulx_u32(x14, x23, &x734);
+{ uint32_t x737; uint32_t x736 = _mulx_u32(x14, x25, &x737);
+{ uint32_t x740; uint32_t x739 = _mulx_u32(x14, x27, &x740);
+{ uint32_t x743; uint32_t x742 = _mulx_u32(x14, x26, &x743);
+{ uint32_t x745; uint8_t x746 = _addcarryx_u32(0x0, x725, x727, &x745);
+{ uint32_t x748; uint8_t x749 = _addcarryx_u32(x746, x728, x730, &x748);
+{ uint32_t x751; uint8_t x752 = _addcarryx_u32(x749, x731, x733, &x751);
+{ uint32_t x754; uint8_t x755 = _addcarryx_u32(x752, x734, x736, &x754);
+{ uint32_t x757; uint8_t x758 = _addcarryx_u32(x755, x737, x739, &x757);
+{ uint32_t x760; uint8_t x761 = _addcarryx_u32(x758, x740, x742, &x760);
+{ uint32_t x763; uint8_t _ = _addcarryx_u32(0x0, x761, x743, &x763);
+{ uint32_t x766; uint8_t x767 = _addcarryx_u32(0x0, x702, x724, &x766);
+{ uint32_t x769; uint8_t x770 = _addcarryx_u32(x767, x705, x745, &x769);
+{ uint32_t x772; uint8_t x773 = _addcarryx_u32(x770, x708, x748, &x772);
+{ uint32_t x775; uint8_t x776 = _addcarryx_u32(x773, x711, x751, &x775);
+{ uint32_t x778; uint8_t x779 = _addcarryx_u32(x776, x714, x754, &x778);
+{ uint32_t x781; uint8_t x782 = _addcarryx_u32(x779, x717, x757, &x781);
+{ uint32_t x784; uint8_t x785 = _addcarryx_u32(x782, x720, x760, &x784);
+{ uint32_t x787; uint8_t x788 = _addcarryx_u32(x785, x722, x763, &x787);
+{ uint32_t _; uint32_t x790 = _mulx_u32(x766, 0xffffffff, &_);
+{ uint32_t x794; uint32_t x793 = _mulx_u32(x790, 0xffffffff, &x794);
+{ uint32_t x797; uint32_t x796 = _mulx_u32(x790, 0xffffffff, &x797);
+{ uint32_t x800; uint32_t x799 = _mulx_u32(x790, 0xffffffff, &x800);
+{ uint32_t x803; uint32_t x802 = _mulx_u32(x790, 0xffffffff, &x803);
+{ uint8_t x804 = (0x0 + 0x0);
+{ uint8_t x805 = (0x0 + 0x0);
+{ uint32_t x807; uint8_t x808 = _addcarryx_u32(0x0, x794, x796, &x807);
+{ uint32_t x810; uint8_t x811 = _addcarryx_u32(x808, x797, x799, &x810);
+{ uint32_t x813; uint8_t x814 = _addcarryx_u32(x811, x800, x802, &x813);
+{ uint32_t x816; uint8_t _ = _addcarryx_u32(0x0, x814, x803, &x816);
+{ uint32_t _; uint8_t x820 = _addcarryx_u32(0x0, x766, x790, &_);
+{ uint32_t x822; uint8_t x823 = _addcarryx_u32(x820, x769, x804, &x822);
+{ uint32_t x825; uint8_t x826 = _addcarryx_u32(x823, x772, x805, &x825);
+{ uint32_t x828; uint8_t x829 = _addcarryx_u32(x826, x775, x793, &x828);
+{ uint32_t x831; uint8_t x832 = _addcarryx_u32(x829, x778, x807, &x831);
+{ uint32_t x834; uint8_t x835 = _addcarryx_u32(x832, x781, x810, &x834);
+{ uint32_t x837; uint8_t x838 = _addcarryx_u32(x835, x784, x813, &x837);
+{ uint32_t x840; uint8_t x841 = _addcarryx_u32(x838, x787, x816, &x840);
+{ uint8_t x842 = (x841 + x788);
+{ uint32_t x844; uint8_t x845 = _subborrow_u32(0x0, x822, 0x1, &x844);
+{ uint32_t x847; uint8_t x848 = _subborrow_u32(x845, x825, 0x0, &x847);
+{ uint32_t x850; uint8_t x851 = _subborrow_u32(x848, x828, 0x0, &x850);
+{ uint32_t x853; uint8_t x854 = _subborrow_u32(x851, x831, 0xffffffff, &x853);
+{ uint32_t x856; uint8_t x857 = _subborrow_u32(x854, x834, 0xffffffff, &x856);
+{ uint32_t x859; uint8_t x860 = _subborrow_u32(x857, x837, 0xffffffff, &x859);
+{ uint32_t x862; uint8_t x863 = _subborrow_u32(x860, x840, 0xffffffff, &x862);
+{ uint32_t _; uint8_t x866 = _subborrow_u32(x863, x842, 0x0, &_);
+{ uint32_t x867 = cmovznz(x866, x862, x840);
+{ uint32_t x868 = cmovznz(x866, x859, x837);
+{ uint32_t x869 = cmovznz(x866, x856, x834);
+{ uint32_t x870 = cmovznz(x866, x853, x831);
+{ uint32_t x871 = cmovznz(x866, x850, x828);
+{ uint32_t x872 = cmovznz(x866, x847, x825);
+{ uint32_t x873 = cmovznz(x866, x844, x822);
+out[0] = x867;
+out[1] = x868;
+out[2] = x869;
+out[3] = x870;
+out[4] = x871;
+out[5] = x872;
+out[6] = x873;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery32_2e224m2e96p1/femul.h b/src/Specific/montgomery32_2e224m2e96p1/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery32_2e224m2e96p1/fenz.c b/src/Specific/montgomery32_2e224m2e96p1/fenz.c
new file mode 100644
index 000000000..a6516fa7a
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x13 = (x12 | x11);
+{ uint32_t x14 = (x10 | x13);
+{ uint32_t x15 = (x8 | x14);
+{ uint32_t x16 = (x6 | x15);
+{ uint32_t x17 = (x4 | x16);
+{ uint32_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e224m2e96p1/fenz.h b/src/Specific/montgomery32_2e224m2e96p1/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery32_2e224m2e96p1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e226m5/feadd.c b/src/Specific/montgomery32_2e226m5/feadd.c
new file mode 100644
index 000000000..f6c20643d
--- /dev/null
+++ b/src/Specific/montgomery32_2e226m5/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffffb, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e226m5/feadd.h b/src/Specific/montgomery32_2e226m5/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e226m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e226m5/fenz.c b/src/Specific/montgomery32_2e226m5/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e226m5/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e226m5/fenz.h b/src/Specific/montgomery32_2e226m5/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e226m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e230m27/feadd.c b/src/Specific/montgomery32_2e230m27/feadd.c
new file mode 100644
index 000000000..3cbde1b2f
--- /dev/null
+++ b/src/Specific/montgomery32_2e230m27/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffe5, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e230m27/feadd.h b/src/Specific/montgomery32_2e230m27/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e230m27/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e230m27/fenz.c b/src/Specific/montgomery32_2e230m27/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e230m27/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e230m27/fenz.h b/src/Specific/montgomery32_2e230m27/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e230m27/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e235m15/feadd.c b/src/Specific/montgomery32_2e235m15/feadd.c
new file mode 100644
index 000000000..892772269
--- /dev/null
+++ b/src/Specific/montgomery32_2e235m15/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff1, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e235m15/feadd.h b/src/Specific/montgomery32_2e235m15/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e235m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e235m15/fenz.c b/src/Specific/montgomery32_2e235m15/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e235m15/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e235m15/fenz.h b/src/Specific/montgomery32_2e235m15/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e235m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e243m9/feadd.c b/src/Specific/montgomery32_2e243m9/feadd.c
new file mode 100644
index 000000000..e09d0a8d4
--- /dev/null
+++ b/src/Specific/montgomery32_2e243m9/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e243m9/feadd.h b/src/Specific/montgomery32_2e243m9/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e243m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e243m9/fenz.c b/src/Specific/montgomery32_2e243m9/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e243m9/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e243m9/fenz.h b/src/Specific/montgomery32_2e243m9/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e243m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e251m9/feadd.c b/src/Specific/montgomery32_2e251m9/feadd.c
new file mode 100644
index 000000000..808a39e88
--- /dev/null
+++ b/src/Specific/montgomery32_2e251m9/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffff7, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7ffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e251m9/feadd.h b/src/Specific/montgomery32_2e251m9/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e251m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e251m9/fenz.c b/src/Specific/montgomery32_2e251m9/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e251m9/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e251m9/fenz.h b/src/Specific/montgomery32_2e251m9/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e251m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c
new file mode 100644
index 000000000..bb1f6172f
--- /dev/null
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x3f80ffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/feadd.h b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e254m127x2e240m1/fenz.h b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e254m127x2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e255m19/feadd.c b/src/Specific/montgomery32_2e255m19/feadd.c
new file mode 100644
index 000000000..e106e3cdc
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m19/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e255m19/feadd.h b/src/Specific/montgomery32_2e255m19/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e255m19/fenz.c b/src/Specific/montgomery32_2e255m19/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m19/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e255m19/fenz.h b/src/Specific/montgomery32_2e255m19/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c
new file mode 100644
index 000000000..e106e3cdc
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffed, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.h b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.h b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m2e4m2e1m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e255m765/feadd.c b/src/Specific/montgomery32_2e255m765/feadd.c
new file mode 100644
index 000000000..d2574eb59
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m765/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffd03, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0x7fffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e255m765/feadd.h b/src/Specific/montgomery32_2e255m765/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m765/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e255m765/fenz.c b/src/Specific/montgomery32_2e255m765/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m765/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e255m765/fenz.h b/src/Specific/montgomery32_2e255m765/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e255m765/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e256m189/feadd.c b/src/Specific/montgomery32_2e256m189/feadd.c
new file mode 100644
index 000000000..552b1ff17
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m189/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffff43, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e256m189/feadd.h b/src/Specific/montgomery32_2e256m189/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m189/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e256m189/fenz.c b/src/Specific/montgomery32_2e256m189/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m189/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e256m189/fenz.h b/src/Specific/montgomery32_2e256m189/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m189/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c
new file mode 100644
index 000000000..30a7671cb
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0x0, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0x0, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0x0, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0x1, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.h b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.h b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e224p2e192p2e96m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e256m2e32m977/feadd.c b/src/Specific/montgomery32_2e256m2e32m977/feadd.c
new file mode 100644
index 000000000..5741bb5c6
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e32m977/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xfffffc2f, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xfffffffe, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffffffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e256m2e32m977/feadd.h b/src/Specific/montgomery32_2e256m2e32m977/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e32m977/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e256m2e32m977/fenz.c b/src/Specific/montgomery32_2e256m2e32m977/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e32m977/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e256m2e32m977/fenz.h b/src/Specific/montgomery32_2e256m2e32m977/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m2e32m977/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c
new file mode 100644
index 000000000..c1853e8e8
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint32_t x33; uint8_t x34 = _addcarryx_u32(0x0, x5, x19, &x33);
+{ uint32_t x36; uint8_t x37 = _addcarryx_u32(x34, x7, x21, &x36);
+{ uint32_t x39; uint8_t x40 = _addcarryx_u32(x37, x9, x23, &x39);
+{ uint32_t x42; uint8_t x43 = _addcarryx_u32(x40, x11, x25, &x42);
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(x43, x13, x27, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x15, x29, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x17, x31, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x16, x30, &x54);
+{ uint32_t x57; uint8_t x58 = _subborrow_u32(0x0, x33, 0xffffffff, &x57);
+{ uint32_t x60; uint8_t x61 = _subborrow_u32(x58, x36, 0xffffffff, &x60);
+{ uint32_t x63; uint8_t x64 = _subborrow_u32(x61, x39, 0xffffffff, &x63);
+{ uint32_t x66; uint8_t x67 = _subborrow_u32(x64, x42, 0xffffffff, &x66);
+{ uint32_t x69; uint8_t x70 = _subborrow_u32(x67, x45, 0xffffffff, &x69);
+{ uint32_t x72; uint8_t x73 = _subborrow_u32(x70, x48, 0xffffffff, &x72);
+{ uint32_t x75; uint8_t x76 = _subborrow_u32(x73, x51, 0xffffffff, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(x76, x54, 0xffa7ffff, &x78);
+{ uint32_t _; uint8_t x82 = _subborrow_u32(x79, x55, 0x0, &_);
+{ uint32_t x83 = cmovznz(x82, x78, x54);
+{ uint32_t x84 = cmovznz(x82, x75, x51);
+{ uint32_t x85 = cmovznz(x82, x72, x48);
+{ uint32_t x86 = cmovznz(x82, x69, x45);
+{ uint32_t x87 = cmovznz(x82, x66, x42);
+{ uint32_t x88 = cmovznz(x82, x63, x39);
+{ uint32_t x89 = cmovznz(x82, x60, x36);
+{ uint32_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/feadd.h b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c
new file mode 100644
index 000000000..d35bfdd14
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x15 = (x14 | x13);
+{ uint32_t x16 = (x12 | x15);
+{ uint32_t x17 = (x10 | x16);
+{ uint32_t x18 = (x8 | x17);
+{ uint32_t x19 = (x6 | x18);
+{ uint32_t x20 = (x4 | x19);
+{ uint32_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e256m88x2e240m1/fenz.h b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery32_2e256m88x2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e266m3/feadd.c b/src/Specific/montgomery32_2e266m3/feadd.c
new file mode 100644
index 000000000..4ec364754
--- /dev/null
+++ b/src/Specific/montgomery32_2e266m3/feadd.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
+{ uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffffd, &x64);
+{ uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
+{ uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
+{ uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
+{ uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
+{ uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
+{ uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x3ff, &x88);
+{ uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
+{ uint32_t x93 = cmovznz(x92, x88, x61);
+{ uint32_t x94 = cmovznz(x92, x85, x58);
+{ uint32_t x95 = cmovznz(x92, x82, x55);
+{ uint32_t x96 = cmovznz(x92, x79, x52);
+{ uint32_t x97 = cmovznz(x92, x76, x49);
+{ uint32_t x98 = cmovznz(x92, x73, x46);
+{ uint32_t x99 = cmovznz(x92, x70, x43);
+{ uint32_t x100 = cmovznz(x92, x67, x40);
+{ uint32_t x101 = cmovznz(x92, x64, x37);
+out[0] = x93;
+out[1] = x94;
+out[2] = x95;
+out[3] = x96;
+out[4] = x97;
+out[5] = x98;
+out[6] = x99;
+out[7] = x100;
+out[8] = x101;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/montgomery32_2e266m3/feadd.h b/src/Specific/montgomery32_2e266m3/feadd.h
new file mode 100644
index 000000000..a1ada01c4
--- /dev/null
+++ b/src/Specific/montgomery32_2e266m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/montgomery32_2e266m3/fenz.c b/src/Specific/montgomery32_2e266m3/fenz.c
new file mode 100644
index 000000000..2566ebcbc
--- /dev/null
+++ b/src/Specific/montgomery32_2e266m3/fenz.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x17 = (x16 | x15);
+{ uint32_t x18 = (x14 | x17);
+{ uint32_t x19 = (x12 | x18);
+{ uint32_t x20 = (x10 | x19);
+{ uint32_t x21 = (x8 | x20);
+{ uint32_t x22 = (x6 | x21);
+{ uint32_t x23 = (x4 | x22);
+{ uint32_t x24 = (x2 | x23);
+out[0] = x24;
+}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e266m3/fenz.h b/src/Specific/montgomery32_2e266m3/fenz.h
new file mode 100644
index 000000000..47fa749c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e266m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e285m9/feadd.c b/src/Specific/montgomery32_2e285m9/feadd.c
new file mode 100644
index 000000000..ce65d9fa5
--- /dev/null
+++ b/src/Specific/montgomery32_2e285m9/feadd.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint32_t x37; uint8_t x38 = _addcarryx_u32(0x0, x5, x21, &x37);
+{ uint32_t x40; uint8_t x41 = _addcarryx_u32(x38, x7, x23, &x40);
+{ uint32_t x43; uint8_t x44 = _addcarryx_u32(x41, x9, x25, &x43);
+{ uint32_t x46; uint8_t x47 = _addcarryx_u32(x44, x11, x27, &x46);
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(x47, x13, x29, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x15, x31, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x17, x33, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x19, x35, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x18, x34, &x61);
+{ uint32_t x64; uint8_t x65 = _subborrow_u32(0x0, x37, 0xfffffff7, &x64);
+{ uint32_t x67; uint8_t x68 = _subborrow_u32(x65, x40, 0xffffffff, &x67);
+{ uint32_t x70; uint8_t x71 = _subborrow_u32(x68, x43, 0xffffffff, &x70);
+{ uint32_t x73; uint8_t x74 = _subborrow_u32(x71, x46, 0xffffffff, &x73);
+{ uint32_t x76; uint8_t x77 = _subborrow_u32(x74, x49, 0xffffffff, &x76);
+{ uint32_t x79; uint8_t x80 = _subborrow_u32(x77, x52, 0xffffffff, &x79);
+{ uint32_t x82; uint8_t x83 = _subborrow_u32(x80, x55, 0xffffffff, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(x83, x58, 0xffffffff, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x61, 0x1fffffff, &x88);
+{ uint32_t _; uint8_t x92 = _subborrow_u32(x89, x62, 0x0, &_);
+{ uint32_t x93 = cmovznz(x92, x88, x61);
+{ uint32_t x94 = cmovznz(x92, x85, x58);
+{ uint32_t x95 = cmovznz(x92, x82, x55);
+{ uint32_t x96 = cmovznz(x92, x79, x52);
+{ uint32_t x97 = cmovznz(x92, x76, x49);
+{ uint32_t x98 = cmovznz(x92, x73, x46);
+{ uint32_t x99 = cmovznz(x92, x70, x43);
+{ uint32_t x100 = cmovznz(x92, x67, x40);
+{ uint32_t x101 = cmovznz(x92, x64, x37);
+out[0] = x93;
+out[1] = x94;
+out[2] = x95;
+out[3] = x96;
+out[4] = x97;
+out[5] = x98;
+out[6] = x99;
+out[7] = x100;
+out[8] = x101;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/montgomery32_2e285m9/feadd.h b/src/Specific/montgomery32_2e285m9/feadd.h
new file mode 100644
index 000000000..a1ada01c4
--- /dev/null
+++ b/src/Specific/montgomery32_2e285m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/montgomery32_2e285m9/fenz.c b/src/Specific/montgomery32_2e285m9/fenz.c
new file mode 100644
index 000000000..2566ebcbc
--- /dev/null
+++ b/src/Specific/montgomery32_2e285m9/fenz.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x17 = (x16 | x15);
+{ uint32_t x18 = (x14 | x17);
+{ uint32_t x19 = (x12 | x18);
+{ uint32_t x20 = (x10 | x19);
+{ uint32_t x21 = (x8 | x20);
+{ uint32_t x22 = (x6 | x21);
+{ uint32_t x23 = (x4 | x22);
+{ uint32_t x24 = (x2 | x23);
+out[0] = x24;
+}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e285m9/fenz.h b/src/Specific/montgomery32_2e285m9/fenz.h
new file mode 100644
index 000000000..47fa749c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e285m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e291m19/feadd.c b/src/Specific/montgomery32_2e291m19/feadd.c
new file mode 100644
index 000000000..a589c0281
--- /dev/null
+++ b/src/Specific/montgomery32_2e291m19/feadd.c
@@ -0,0 +1,62 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint32_t x41; uint8_t x42 = _addcarryx_u32(0x0, x5, x23, &x41);
+{ uint32_t x44; uint8_t x45 = _addcarryx_u32(x42, x7, x25, &x44);
+{ uint32_t x47; uint8_t x48 = _addcarryx_u32(x45, x9, x27, &x47);
+{ uint32_t x50; uint8_t x51 = _addcarryx_u32(x48, x11, x29, &x50);
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(x51, x13, x31, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x15, x33, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x17, x35, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x19, x37, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x21, x39, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x20, x38, &x68);
+{ uint32_t x71; uint8_t x72 = _subborrow_u32(0x0, x41, 0xffffffed, &x71);
+{ uint32_t x74; uint8_t x75 = _subborrow_u32(x72, x44, 0xffffffff, &x74);
+{ uint32_t x77; uint8_t x78 = _subborrow_u32(x75, x47, 0xffffffff, &x77);
+{ uint32_t x80; uint8_t x81 = _subborrow_u32(x78, x50, 0xffffffff, &x80);
+{ uint32_t x83; uint8_t x84 = _subborrow_u32(x81, x53, 0xffffffff, &x83);
+{ uint32_t x86; uint8_t x87 = _subborrow_u32(x84, x56, 0xffffffff, &x86);
+{ uint32_t x89; uint8_t x90 = _subborrow_u32(x87, x59, 0xffffffff, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(x90, x62, 0xffffffff, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x65, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x68, 0x7, &x98);
+{ uint32_t _; uint8_t x102 = _subborrow_u32(x99, x69, 0x0, &_);
+{ uint32_t x103 = cmovznz(x102, x98, x68);
+{ uint32_t x104 = cmovznz(x102, x95, x65);
+{ uint32_t x105 = cmovznz(x102, x92, x62);
+{ uint32_t x106 = cmovznz(x102, x89, x59);
+{ uint32_t x107 = cmovznz(x102, x86, x56);
+{ uint32_t x108 = cmovznz(x102, x83, x53);
+{ uint32_t x109 = cmovznz(x102, x80, x50);
+{ uint32_t x110 = cmovznz(x102, x77, x47);
+{ uint32_t x111 = cmovznz(x102, x74, x44);
+{ uint32_t x112 = cmovznz(x102, x71, x41);
+out[0] = x103;
+out[1] = x104;
+out[2] = x105;
+out[3] = x106;
+out[4] = x107;
+out[5] = x108;
+out[6] = x109;
+out[7] = x110;
+out[8] = x111;
+out[9] = x112;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/montgomery32_2e291m19/feadd.h b/src/Specific/montgomery32_2e291m19/feadd.h
new file mode 100644
index 000000000..a35fcb862
--- /dev/null
+++ b/src/Specific/montgomery32_2e291m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/montgomery32_2e291m19/fenz.c b/src/Specific/montgomery32_2e291m19/fenz.c
new file mode 100644
index 000000000..cca29049c
--- /dev/null
+++ b/src/Specific/montgomery32_2e291m19/fenz.c
@@ -0,0 +1,31 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x19 = (x18 | x17);
+{ uint32_t x20 = (x16 | x19);
+{ uint32_t x21 = (x14 | x20);
+{ uint32_t x22 = (x12 | x21);
+{ uint32_t x23 = (x10 | x22);
+{ uint32_t x24 = (x8 | x23);
+{ uint32_t x25 = (x6 | x24);
+{ uint32_t x26 = (x4 | x25);
+{ uint32_t x27 = (x2 | x26);
+out[0] = x27;
+}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e291m19/fenz.h b/src/Specific/montgomery32_2e291m19/fenz.h
new file mode 100644
index 000000000..5fb49c835
--- /dev/null
+++ b/src/Specific/montgomery32_2e291m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e321m9/feadd.c b/src/Specific/montgomery32_2e321m9/feadd.c
new file mode 100644
index 000000000..665eaaa05
--- /dev/null
+++ b/src/Specific/montgomery32_2e321m9/feadd.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff7, &x78);
+{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x1, &x108);
+{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+{ uint32_t x113 = cmovznz(x112, x108, x75);
+{ uint32_t x114 = cmovznz(x112, x105, x72);
+{ uint32_t x115 = cmovznz(x112, x102, x69);
+{ uint32_t x116 = cmovznz(x112, x99, x66);
+{ uint32_t x117 = cmovznz(x112, x96, x63);
+{ uint32_t x118 = cmovznz(x112, x93, x60);
+{ uint32_t x119 = cmovznz(x112, x90, x57);
+{ uint32_t x120 = cmovznz(x112, x87, x54);
+{ uint32_t x121 = cmovznz(x112, x84, x51);
+{ uint32_t x122 = cmovznz(x112, x81, x48);
+{ uint32_t x123 = cmovznz(x112, x78, x45);
+out[0] = x113;
+out[1] = x114;
+out[2] = x115;
+out[3] = x116;
+out[4] = x117;
+out[5] = x118;
+out[6] = x119;
+out[7] = x120;
+out[8] = x121;
+out[9] = x122;
+out[10] = x123;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/montgomery32_2e321m9/feadd.h b/src/Specific/montgomery32_2e321m9/feadd.h
new file mode 100644
index 000000000..ecd47fd4a
--- /dev/null
+++ b/src/Specific/montgomery32_2e321m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/montgomery32_2e321m9/fenz.c b/src/Specific/montgomery32_2e321m9/fenz.c
new file mode 100644
index 000000000..9e77ab0f6
--- /dev/null
+++ b/src/Specific/montgomery32_2e321m9/fenz.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x21 = (x20 | x19);
+{ uint32_t x22 = (x18 | x21);
+{ uint32_t x23 = (x16 | x22);
+{ uint32_t x24 = (x14 | x23);
+{ uint32_t x25 = (x12 | x24);
+{ uint32_t x26 = (x10 | x25);
+{ uint32_t x27 = (x8 | x26);
+{ uint32_t x28 = (x6 | x27);
+{ uint32_t x29 = (x4 | x28);
+{ uint32_t x30 = (x2 | x29);
+out[0] = x30;
+}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e321m9/fenz.h b/src/Specific/montgomery32_2e321m9/fenz.h
new file mode 100644
index 000000000..c835272c1
--- /dev/null
+++ b/src/Specific/montgomery32_2e321m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e322m2e161m1/feadd.c b/src/Specific/montgomery32_2e322m2e161m1/feadd.c
new file mode 100644
index 000000000..ab2d20442
--- /dev/null
+++ b/src/Specific/montgomery32_2e322m2e161m1/feadd.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffff, &x78);
+{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xfffffffd, &x93);
+{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3, &x108);
+{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+{ uint32_t x113 = cmovznz(x112, x108, x75);
+{ uint32_t x114 = cmovznz(x112, x105, x72);
+{ uint32_t x115 = cmovznz(x112, x102, x69);
+{ uint32_t x116 = cmovznz(x112, x99, x66);
+{ uint32_t x117 = cmovznz(x112, x96, x63);
+{ uint32_t x118 = cmovznz(x112, x93, x60);
+{ uint32_t x119 = cmovznz(x112, x90, x57);
+{ uint32_t x120 = cmovznz(x112, x87, x54);
+{ uint32_t x121 = cmovznz(x112, x84, x51);
+{ uint32_t x122 = cmovznz(x112, x81, x48);
+{ uint32_t x123 = cmovznz(x112, x78, x45);
+out[0] = x113;
+out[1] = x114;
+out[2] = x115;
+out[3] = x116;
+out[4] = x117;
+out[5] = x118;
+out[6] = x119;
+out[7] = x120;
+out[8] = x121;
+out[9] = x122;
+out[10] = x123;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/montgomery32_2e322m2e161m1/feadd.h b/src/Specific/montgomery32_2e322m2e161m1/feadd.h
new file mode 100644
index 000000000..ecd47fd4a
--- /dev/null
+++ b/src/Specific/montgomery32_2e322m2e161m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/montgomery32_2e322m2e161m1/fenz.c b/src/Specific/montgomery32_2e322m2e161m1/fenz.c
new file mode 100644
index 000000000..9e77ab0f6
--- /dev/null
+++ b/src/Specific/montgomery32_2e322m2e161m1/fenz.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x21 = (x20 | x19);
+{ uint32_t x22 = (x18 | x21);
+{ uint32_t x23 = (x16 | x22);
+{ uint32_t x24 = (x14 | x23);
+{ uint32_t x25 = (x12 | x24);
+{ uint32_t x26 = (x10 | x25);
+{ uint32_t x27 = (x8 | x26);
+{ uint32_t x28 = (x6 | x27);
+{ uint32_t x29 = (x4 | x28);
+{ uint32_t x30 = (x2 | x29);
+out[0] = x30;
+}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e322m2e161m1/fenz.h b/src/Specific/montgomery32_2e322m2e161m1/fenz.h
new file mode 100644
index 000000000..c835272c1
--- /dev/null
+++ b/src/Specific/montgomery32_2e322m2e161m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e336m17/feadd.c b/src/Specific/montgomery32_2e336m17/feadd.c
new file mode 100644
index 000000000..98051c22b
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m17/feadd.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xffffffef, &x78);
+{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
+{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+{ uint32_t x113 = cmovznz(x112, x108, x75);
+{ uint32_t x114 = cmovznz(x112, x105, x72);
+{ uint32_t x115 = cmovznz(x112, x102, x69);
+{ uint32_t x116 = cmovznz(x112, x99, x66);
+{ uint32_t x117 = cmovznz(x112, x96, x63);
+{ uint32_t x118 = cmovznz(x112, x93, x60);
+{ uint32_t x119 = cmovznz(x112, x90, x57);
+{ uint32_t x120 = cmovznz(x112, x87, x54);
+{ uint32_t x121 = cmovznz(x112, x84, x51);
+{ uint32_t x122 = cmovznz(x112, x81, x48);
+{ uint32_t x123 = cmovznz(x112, x78, x45);
+out[0] = x113;
+out[1] = x114;
+out[2] = x115;
+out[3] = x116;
+out[4] = x117;
+out[5] = x118;
+out[6] = x119;
+out[7] = x120;
+out[8] = x121;
+out[9] = x122;
+out[10] = x123;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/montgomery32_2e336m17/feadd.h b/src/Specific/montgomery32_2e336m17/feadd.h
new file mode 100644
index 000000000..ecd47fd4a
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/montgomery32_2e336m17/fenz.c b/src/Specific/montgomery32_2e336m17/fenz.c
new file mode 100644
index 000000000..9e77ab0f6
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m17/fenz.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x21 = (x20 | x19);
+{ uint32_t x22 = (x18 | x21);
+{ uint32_t x23 = (x16 | x22);
+{ uint32_t x24 = (x14 | x23);
+{ uint32_t x25 = (x12 | x24);
+{ uint32_t x26 = (x10 | x25);
+{ uint32_t x27 = (x8 | x26);
+{ uint32_t x28 = (x6 | x27);
+{ uint32_t x29 = (x4 | x28);
+{ uint32_t x30 = (x2 | x29);
+out[0] = x30;
+}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e336m17/fenz.h b/src/Specific/montgomery32_2e336m17/fenz.h
new file mode 100644
index 000000000..c835272c1
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e336m3/feadd.c b/src/Specific/montgomery32_2e336m3/feadd.c
new file mode 100644
index 000000000..2b9e8f492
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m3/feadd.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffffd, &x78);
+{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0xffff, &x108);
+{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+{ uint32_t x113 = cmovznz(x112, x108, x75);
+{ uint32_t x114 = cmovznz(x112, x105, x72);
+{ uint32_t x115 = cmovznz(x112, x102, x69);
+{ uint32_t x116 = cmovznz(x112, x99, x66);
+{ uint32_t x117 = cmovznz(x112, x96, x63);
+{ uint32_t x118 = cmovznz(x112, x93, x60);
+{ uint32_t x119 = cmovznz(x112, x90, x57);
+{ uint32_t x120 = cmovznz(x112, x87, x54);
+{ uint32_t x121 = cmovznz(x112, x84, x51);
+{ uint32_t x122 = cmovznz(x112, x81, x48);
+{ uint32_t x123 = cmovznz(x112, x78, x45);
+out[0] = x113;
+out[1] = x114;
+out[2] = x115;
+out[3] = x116;
+out[4] = x117;
+out[5] = x118;
+out[6] = x119;
+out[7] = x120;
+out[8] = x121;
+out[9] = x122;
+out[10] = x123;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/montgomery32_2e336m3/feadd.h b/src/Specific/montgomery32_2e336m3/feadd.h
new file mode 100644
index 000000000..ecd47fd4a
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/montgomery32_2e336m3/fenz.c b/src/Specific/montgomery32_2e336m3/fenz.c
new file mode 100644
index 000000000..9e77ab0f6
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m3/fenz.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x21 = (x20 | x19);
+{ uint32_t x22 = (x18 | x21);
+{ uint32_t x23 = (x16 | x22);
+{ uint32_t x24 = (x14 | x23);
+{ uint32_t x25 = (x12 | x24);
+{ uint32_t x26 = (x10 | x25);
+{ uint32_t x27 = (x8 | x26);
+{ uint32_t x28 = (x6 | x27);
+{ uint32_t x29 = (x4 | x28);
+{ uint32_t x30 = (x2 | x29);
+out[0] = x30;
+}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e336m3/fenz.h b/src/Specific/montgomery32_2e336m3/fenz.h
new file mode 100644
index 000000000..c835272c1
--- /dev/null
+++ b/src/Specific/montgomery32_2e336m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e338m15/feadd.c b/src/Specific/montgomery32_2e338m15/feadd.c
new file mode 100644
index 000000000..6f3769ede
--- /dev/null
+++ b/src/Specific/montgomery32_2e338m15/feadd.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint32_t x45; uint8_t x46 = _addcarryx_u32(0x0, x5, x25, &x45);
+{ uint32_t x48; uint8_t x49 = _addcarryx_u32(x46, x7, x27, &x48);
+{ uint32_t x51; uint8_t x52 = _addcarryx_u32(x49, x9, x29, &x51);
+{ uint32_t x54; uint8_t x55 = _addcarryx_u32(x52, x11, x31, &x54);
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(x55, x13, x33, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x15, x35, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x17, x37, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x19, x39, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x21, x41, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x23, x43, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x22, x42, &x75);
+{ uint32_t x78; uint8_t x79 = _subborrow_u32(0x0, x45, 0xfffffff1, &x78);
+{ uint32_t x81; uint8_t x82 = _subborrow_u32(x79, x48, 0xffffffff, &x81);
+{ uint32_t x84; uint8_t x85 = _subborrow_u32(x82, x51, 0xffffffff, &x84);
+{ uint32_t x87; uint8_t x88 = _subborrow_u32(x85, x54, 0xffffffff, &x87);
+{ uint32_t x90; uint8_t x91 = _subborrow_u32(x88, x57, 0xffffffff, &x90);
+{ uint32_t x93; uint8_t x94 = _subborrow_u32(x91, x60, 0xffffffff, &x93);
+{ uint32_t x96; uint8_t x97 = _subborrow_u32(x94, x63, 0xffffffff, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(x97, x66, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x69, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x72, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x75, 0x3ffff, &x108);
+{ uint32_t _; uint8_t x112 = _subborrow_u32(x109, x76, 0x0, &_);
+{ uint32_t x113 = cmovznz(x112, x108, x75);
+{ uint32_t x114 = cmovznz(x112, x105, x72);
+{ uint32_t x115 = cmovznz(x112, x102, x69);
+{ uint32_t x116 = cmovznz(x112, x99, x66);
+{ uint32_t x117 = cmovznz(x112, x96, x63);
+{ uint32_t x118 = cmovznz(x112, x93, x60);
+{ uint32_t x119 = cmovznz(x112, x90, x57);
+{ uint32_t x120 = cmovznz(x112, x87, x54);
+{ uint32_t x121 = cmovznz(x112, x84, x51);
+{ uint32_t x122 = cmovznz(x112, x81, x48);
+{ uint32_t x123 = cmovznz(x112, x78, x45);
+out[0] = x113;
+out[1] = x114;
+out[2] = x115;
+out[3] = x116;
+out[4] = x117;
+out[5] = x118;
+out[6] = x119;
+out[7] = x120;
+out[8] = x121;
+out[9] = x122;
+out[10] = x123;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/montgomery32_2e338m15/feadd.h b/src/Specific/montgomery32_2e338m15/feadd.h
new file mode 100644
index 000000000..ecd47fd4a
--- /dev/null
+++ b/src/Specific/montgomery32_2e338m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/montgomery32_2e338m15/fenz.c b/src/Specific/montgomery32_2e338m15/fenz.c
new file mode 100644
index 000000000..9e77ab0f6
--- /dev/null
+++ b/src/Specific/montgomery32_2e338m15/fenz.c
@@ -0,0 +1,32 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x21 = (x20 | x19);
+{ uint32_t x22 = (x18 | x21);
+{ uint32_t x23 = (x16 | x22);
+{ uint32_t x24 = (x14 | x23);
+{ uint32_t x25 = (x12 | x24);
+{ uint32_t x26 = (x10 | x25);
+{ uint32_t x27 = (x8 | x26);
+{ uint32_t x28 = (x6 | x27);
+{ uint32_t x29 = (x4 | x28);
+{ uint32_t x30 = (x2 | x29);
+out[0] = x30;
+}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e338m15/fenz.h b/src/Specific/montgomery32_2e338m15/fenz.h
new file mode 100644
index 000000000..c835272c1
--- /dev/null
+++ b/src/Specific/montgomery32_2e338m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e369m25/feadd.c b/src/Specific/montgomery32_2e369m25/feadd.c
new file mode 100644
index 000000000..340cf25c9
--- /dev/null
+++ b/src/Specific/montgomery32_2e369m25/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe7, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x1ffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e369m25/feadd.h b/src/Specific/montgomery32_2e369m25/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e369m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e369m25/fenz.c b/src/Specific/montgomery32_2e369m25/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e369m25/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e369m25/fenz.h b/src/Specific/montgomery32_2e369m25/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e369m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e379m19/feadd.c b/src/Specific/montgomery32_2e379m19/feadd.c
new file mode 100644
index 000000000..b26447d8e
--- /dev/null
+++ b/src/Specific/montgomery32_2e379m19/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffed, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7ffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e379m19/feadd.h b/src/Specific/montgomery32_2e379m19/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e379m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e379m19/fenz.c b/src/Specific/montgomery32_2e379m19/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e379m19/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e379m19/fenz.h b/src/Specific/montgomery32_2e379m19/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e379m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e382m105/feadd.c b/src/Specific/montgomery32_2e382m105/feadd.c
new file mode 100644
index 000000000..3a2ec01f0
--- /dev/null
+++ b/src/Specific/montgomery32_2e382m105/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff97, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x3fffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e382m105/feadd.h b/src/Specific/montgomery32_2e382m105/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e382m105/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e382m105/fenz.c b/src/Specific/montgomery32_2e382m105/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e382m105/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e382m105/fenz.h b/src/Specific/montgomery32_2e382m105/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e382m105/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e383m187/feadd.c b/src/Specific/montgomery32_2e383m187/feadd.c
new file mode 100644
index 000000000..da64dfe0c
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m187/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffff45, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e383m187/feadd.h b/src/Specific/montgomery32_2e383m187/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m187/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e383m187/fenz.c b/src/Specific/montgomery32_2e383m187/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m187/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e383m187/fenz.h b/src/Specific/montgomery32_2e383m187/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m187/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e383m31/feadd.c b/src/Specific/montgomery32_2e383m31/feadd.c
new file mode 100644
index 000000000..39320467e
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m31/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffe1, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e383m31/feadd.h b/src/Specific/montgomery32_2e383m31/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e383m31/fenz.c b/src/Specific/montgomery32_2e383m31/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m31/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e383m31/fenz.h b/src/Specific/montgomery32_2e383m31/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e383m421/feadd.c b/src/Specific/montgomery32_2e383m421/feadd.c
new file mode 100644
index 000000000..c299cb16e
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m421/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffe5b, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0x7fffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e383m421/feadd.h b/src/Specific/montgomery32_2e383m421/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m421/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e383m421/fenz.c b/src/Specific/montgomery32_2e383m421/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m421/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e383m421/fenz.h b/src/Specific/montgomery32_2e383m421/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e383m421/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c
new file mode 100644
index 000000000..a39adecf1
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0x0, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0x0, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xfffffffe, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.h b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.h b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m2e128m2e96p2e32m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e384m317/feadd.c b/src/Specific/montgomery32_2e384m317/feadd.c
new file mode 100644
index 000000000..a527b27f5
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m317/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xfffffec3, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xffffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e384m317/feadd.h b/src/Specific/montgomery32_2e384m317/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m317/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e384m317/fenz.c b/src/Specific/montgomery32_2e384m317/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m317/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e384m317/fenz.h b/src/Specific/montgomery32_2e384m317/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m317/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c
new file mode 100644
index 000000000..06a36e89b
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xfffaffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/feadd.h b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e384m5x2e368m1/fenz.h b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m5x2e368m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c
new file mode 100644
index 000000000..f52ce38ec
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.c
@@ -0,0 +1,70 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint32_t x49; uint8_t x50 = _addcarryx_u32(0x0, x5, x27, &x49);
+{ uint32_t x52; uint8_t x53 = _addcarryx_u32(x50, x7, x29, &x52);
+{ uint32_t x55; uint8_t x56 = _addcarryx_u32(x53, x9, x31, &x55);
+{ uint32_t x58; uint8_t x59 = _addcarryx_u32(x56, x11, x33, &x58);
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(x59, x13, x35, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x15, x37, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x17, x39, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x19, x41, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x21, x43, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x23, x45, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x25, x47, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x24, x46, &x82);
+{ uint32_t x85; uint8_t x86 = _subborrow_u32(0x0, x49, 0xffffffff, &x85);
+{ uint32_t x88; uint8_t x89 = _subborrow_u32(x86, x52, 0xffffffff, &x88);
+{ uint32_t x91; uint8_t x92 = _subborrow_u32(x89, x55, 0xffffffff, &x91);
+{ uint32_t x94; uint8_t x95 = _subborrow_u32(x92, x58, 0xffffffff, &x94);
+{ uint32_t x97; uint8_t x98 = _subborrow_u32(x95, x61, 0xffffffff, &x97);
+{ uint32_t x100; uint8_t x101 = _subborrow_u32(x98, x64, 0xffffffff, &x100);
+{ uint32_t x103; uint8_t x104 = _subborrow_u32(x101, x67, 0xffffffff, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(x104, x70, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x73, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x76, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x79, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x82, 0xb0ffffff, &x118);
+{ uint32_t _; uint8_t x122 = _subborrow_u32(x119, x83, 0x0, &_);
+{ uint32_t x123 = cmovznz(x122, x118, x82);
+{ uint32_t x124 = cmovznz(x122, x115, x79);
+{ uint32_t x125 = cmovznz(x122, x112, x76);
+{ uint32_t x126 = cmovznz(x122, x109, x73);
+{ uint32_t x127 = cmovznz(x122, x106, x70);
+{ uint32_t x128 = cmovznz(x122, x103, x67);
+{ uint32_t x129 = cmovznz(x122, x100, x64);
+{ uint32_t x130 = cmovznz(x122, x97, x61);
+{ uint32_t x131 = cmovznz(x122, x94, x58);
+{ uint32_t x132 = cmovznz(x122, x91, x55);
+{ uint32_t x133 = cmovznz(x122, x88, x52);
+{ uint32_t x134 = cmovznz(x122, x85, x49);
+out[0] = x123;
+out[1] = x124;
+out[2] = x125;
+out[3] = x126;
+out[4] = x127;
+out[5] = x128;
+out[6] = x129;
+out[7] = x130;
+out[8] = x131;
+out[9] = x132;
+out[10] = x133;
+out[11] = x134;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/feadd.h b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.h
new file mode 100644
index 000000000..06344a9dc
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c
new file mode 100644
index 000000000..047ff14c3
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x23 = (x22 | x21);
+{ uint32_t x24 = (x20 | x23);
+{ uint32_t x25 = (x18 | x24);
+{ uint32_t x26 = (x16 | x25);
+{ uint32_t x27 = (x14 | x26);
+{ uint32_t x28 = (x12 | x27);
+{ uint32_t x29 = (x10 | x28);
+{ uint32_t x30 = (x8 | x29);
+{ uint32_t x31 = (x6 | x30);
+{ uint32_t x32 = (x4 | x31);
+{ uint32_t x33 = (x2 | x32);
+out[0] = x33;
+}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e384m79x2e376m1/fenz.h b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.h
new file mode 100644
index 000000000..8d17e783b
--- /dev/null
+++ b/src/Specific/montgomery32_2e384m79x2e376m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e389m21/feadd.c b/src/Specific/montgomery32_2e389m21/feadd.c
new file mode 100644
index 000000000..d4266a546
--- /dev/null
+++ b/src/Specific/montgomery32_2e389m21/feadd.c
@@ -0,0 +1,74 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1f, &x128);
+{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+{ uint32_t x133 = cmovznz(x132, x128, x89);
+{ uint32_t x134 = cmovznz(x132, x125, x86);
+{ uint32_t x135 = cmovznz(x132, x122, x83);
+{ uint32_t x136 = cmovznz(x132, x119, x80);
+{ uint32_t x137 = cmovznz(x132, x116, x77);
+{ uint32_t x138 = cmovznz(x132, x113, x74);
+{ uint32_t x139 = cmovznz(x132, x110, x71);
+{ uint32_t x140 = cmovznz(x132, x107, x68);
+{ uint32_t x141 = cmovznz(x132, x104, x65);
+{ uint32_t x142 = cmovznz(x132, x101, x62);
+{ uint32_t x143 = cmovznz(x132, x98, x59);
+{ uint32_t x144 = cmovznz(x132, x95, x56);
+{ uint32_t x145 = cmovznz(x132, x92, x53);
+out[0] = x133;
+out[1] = x134;
+out[2] = x135;
+out[3] = x136;
+out[4] = x137;
+out[5] = x138;
+out[6] = x139;
+out[7] = x140;
+out[8] = x141;
+out[9] = x142;
+out[10] = x143;
+out[11] = x144;
+out[12] = x145;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/montgomery32_2e389m21/feadd.h b/src/Specific/montgomery32_2e389m21/feadd.h
new file mode 100644
index 000000000..b10483f1f
--- /dev/null
+++ b/src/Specific/montgomery32_2e389m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/montgomery32_2e389m21/fenz.c b/src/Specific/montgomery32_2e389m21/fenz.c
new file mode 100644
index 000000000..521513c0d
--- /dev/null
+++ b/src/Specific/montgomery32_2e389m21/fenz.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x25 = (x24 | x23);
+{ uint32_t x26 = (x22 | x25);
+{ uint32_t x27 = (x20 | x26);
+{ uint32_t x28 = (x18 | x27);
+{ uint32_t x29 = (x16 | x28);
+{ uint32_t x30 = (x14 | x29);
+{ uint32_t x31 = (x12 | x30);
+{ uint32_t x32 = (x10 | x31);
+{ uint32_t x33 = (x8 | x32);
+{ uint32_t x34 = (x6 | x33);
+{ uint32_t x35 = (x4 | x34);
+{ uint32_t x36 = (x2 | x35);
+out[0] = x36;
+}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e389m21/fenz.h b/src/Specific/montgomery32_2e389m21/fenz.h
new file mode 100644
index 000000000..a5976e506
--- /dev/null
+++ b/src/Specific/montgomery32_2e389m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e401m31/feadd.c b/src/Specific/montgomery32_2e401m31/feadd.c
new file mode 100644
index 000000000..cedd0ac77
--- /dev/null
+++ b/src/Specific/montgomery32_2e401m31/feadd.c
@@ -0,0 +1,74 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffe1, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1ffff, &x128);
+{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+{ uint32_t x133 = cmovznz(x132, x128, x89);
+{ uint32_t x134 = cmovznz(x132, x125, x86);
+{ uint32_t x135 = cmovznz(x132, x122, x83);
+{ uint32_t x136 = cmovznz(x132, x119, x80);
+{ uint32_t x137 = cmovznz(x132, x116, x77);
+{ uint32_t x138 = cmovznz(x132, x113, x74);
+{ uint32_t x139 = cmovznz(x132, x110, x71);
+{ uint32_t x140 = cmovznz(x132, x107, x68);
+{ uint32_t x141 = cmovznz(x132, x104, x65);
+{ uint32_t x142 = cmovznz(x132, x101, x62);
+{ uint32_t x143 = cmovznz(x132, x98, x59);
+{ uint32_t x144 = cmovznz(x132, x95, x56);
+{ uint32_t x145 = cmovznz(x132, x92, x53);
+out[0] = x133;
+out[1] = x134;
+out[2] = x135;
+out[3] = x136;
+out[4] = x137;
+out[5] = x138;
+out[6] = x139;
+out[7] = x140;
+out[8] = x141;
+out[9] = x142;
+out[10] = x143;
+out[11] = x144;
+out[12] = x145;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/montgomery32_2e401m31/feadd.h b/src/Specific/montgomery32_2e401m31/feadd.h
new file mode 100644
index 000000000..b10483f1f
--- /dev/null
+++ b/src/Specific/montgomery32_2e401m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/montgomery32_2e401m31/fenz.c b/src/Specific/montgomery32_2e401m31/fenz.c
new file mode 100644
index 000000000..521513c0d
--- /dev/null
+++ b/src/Specific/montgomery32_2e401m31/fenz.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x25 = (x24 | x23);
+{ uint32_t x26 = (x22 | x25);
+{ uint32_t x27 = (x20 | x26);
+{ uint32_t x28 = (x18 | x27);
+{ uint32_t x29 = (x16 | x28);
+{ uint32_t x30 = (x14 | x29);
+{ uint32_t x31 = (x12 | x30);
+{ uint32_t x32 = (x10 | x31);
+{ uint32_t x33 = (x8 | x32);
+{ uint32_t x34 = (x6 | x33);
+{ uint32_t x35 = (x4 | x34);
+{ uint32_t x36 = (x2 | x35);
+out[0] = x36;
+}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e401m31/fenz.h b/src/Specific/montgomery32_2e401m31/fenz.h
new file mode 100644
index 000000000..a5976e506
--- /dev/null
+++ b/src/Specific/montgomery32_2e401m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e413m21/feadd.c b/src/Specific/montgomery32_2e413m21/feadd.c
new file mode 100644
index 000000000..9d4ce928b
--- /dev/null
+++ b/src/Specific/montgomery32_2e413m21/feadd.c
@@ -0,0 +1,74 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffeb, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x1fffffff, &x128);
+{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+{ uint32_t x133 = cmovznz(x132, x128, x89);
+{ uint32_t x134 = cmovznz(x132, x125, x86);
+{ uint32_t x135 = cmovznz(x132, x122, x83);
+{ uint32_t x136 = cmovznz(x132, x119, x80);
+{ uint32_t x137 = cmovznz(x132, x116, x77);
+{ uint32_t x138 = cmovznz(x132, x113, x74);
+{ uint32_t x139 = cmovznz(x132, x110, x71);
+{ uint32_t x140 = cmovznz(x132, x107, x68);
+{ uint32_t x141 = cmovznz(x132, x104, x65);
+{ uint32_t x142 = cmovznz(x132, x101, x62);
+{ uint32_t x143 = cmovznz(x132, x98, x59);
+{ uint32_t x144 = cmovznz(x132, x95, x56);
+{ uint32_t x145 = cmovznz(x132, x92, x53);
+out[0] = x133;
+out[1] = x134;
+out[2] = x135;
+out[3] = x136;
+out[4] = x137;
+out[5] = x138;
+out[6] = x139;
+out[7] = x140;
+out[8] = x141;
+out[9] = x142;
+out[10] = x143;
+out[11] = x144;
+out[12] = x145;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/montgomery32_2e413m21/feadd.h b/src/Specific/montgomery32_2e413m21/feadd.h
new file mode 100644
index 000000000..b10483f1f
--- /dev/null
+++ b/src/Specific/montgomery32_2e413m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/montgomery32_2e413m21/fenz.c b/src/Specific/montgomery32_2e413m21/fenz.c
new file mode 100644
index 000000000..521513c0d
--- /dev/null
+++ b/src/Specific/montgomery32_2e413m21/fenz.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x25 = (x24 | x23);
+{ uint32_t x26 = (x22 | x25);
+{ uint32_t x27 = (x20 | x26);
+{ uint32_t x28 = (x18 | x27);
+{ uint32_t x29 = (x16 | x28);
+{ uint32_t x30 = (x14 | x29);
+{ uint32_t x31 = (x12 | x30);
+{ uint32_t x32 = (x10 | x31);
+{ uint32_t x33 = (x8 | x32);
+{ uint32_t x34 = (x6 | x33);
+{ uint32_t x35 = (x4 | x34);
+{ uint32_t x36 = (x2 | x35);
+out[0] = x36;
+}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e413m21/fenz.h b/src/Specific/montgomery32_2e413m21/fenz.h
new file mode 100644
index 000000000..a5976e506
--- /dev/null
+++ b/src/Specific/montgomery32_2e413m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e414m17/feadd.c b/src/Specific/montgomery32_2e414m17/feadd.c
new file mode 100644
index 000000000..f079eef7b
--- /dev/null
+++ b/src/Specific/montgomery32_2e414m17/feadd.c
@@ -0,0 +1,74 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffef, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xffffffff, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0x3fffffff, &x128);
+{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+{ uint32_t x133 = cmovznz(x132, x128, x89);
+{ uint32_t x134 = cmovznz(x132, x125, x86);
+{ uint32_t x135 = cmovznz(x132, x122, x83);
+{ uint32_t x136 = cmovznz(x132, x119, x80);
+{ uint32_t x137 = cmovznz(x132, x116, x77);
+{ uint32_t x138 = cmovznz(x132, x113, x74);
+{ uint32_t x139 = cmovznz(x132, x110, x71);
+{ uint32_t x140 = cmovznz(x132, x107, x68);
+{ uint32_t x141 = cmovznz(x132, x104, x65);
+{ uint32_t x142 = cmovznz(x132, x101, x62);
+{ uint32_t x143 = cmovznz(x132, x98, x59);
+{ uint32_t x144 = cmovznz(x132, x95, x56);
+{ uint32_t x145 = cmovznz(x132, x92, x53);
+out[0] = x133;
+out[1] = x134;
+out[2] = x135;
+out[3] = x136;
+out[4] = x137;
+out[5] = x138;
+out[6] = x139;
+out[7] = x140;
+out[8] = x141;
+out[9] = x142;
+out[10] = x143;
+out[11] = x144;
+out[12] = x145;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/montgomery32_2e414m17/feadd.h b/src/Specific/montgomery32_2e414m17/feadd.h
new file mode 100644
index 000000000..b10483f1f
--- /dev/null
+++ b/src/Specific/montgomery32_2e414m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/montgomery32_2e414m17/fenz.c b/src/Specific/montgomery32_2e414m17/fenz.c
new file mode 100644
index 000000000..521513c0d
--- /dev/null
+++ b/src/Specific/montgomery32_2e414m17/fenz.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x25 = (x24 | x23);
+{ uint32_t x26 = (x22 | x25);
+{ uint32_t x27 = (x20 | x26);
+{ uint32_t x28 = (x18 | x27);
+{ uint32_t x29 = (x16 | x28);
+{ uint32_t x30 = (x14 | x29);
+{ uint32_t x31 = (x12 | x30);
+{ uint32_t x32 = (x10 | x31);
+{ uint32_t x33 = (x8 | x32);
+{ uint32_t x34 = (x6 | x33);
+{ uint32_t x35 = (x4 | x34);
+{ uint32_t x36 = (x2 | x35);
+out[0] = x36;
+}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e414m17/fenz.h b/src/Specific/montgomery32_2e414m17/fenz.h
new file mode 100644
index 000000000..a5976e506
--- /dev/null
+++ b/src/Specific/montgomery32_2e414m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e416m2e208m1/feadd.c b/src/Specific/montgomery32_2e416m2e208m1/feadd.c
new file mode 100644
index 000000000..2e2850e48
--- /dev/null
+++ b/src/Specific/montgomery32_2e416m2e208m1/feadd.c
@@ -0,0 +1,74 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint32_t x53; uint8_t x54 = _addcarryx_u32(0x0, x5, x29, &x53);
+{ uint32_t x56; uint8_t x57 = _addcarryx_u32(x54, x7, x31, &x56);
+{ uint32_t x59; uint8_t x60 = _addcarryx_u32(x57, x9, x33, &x59);
+{ uint32_t x62; uint8_t x63 = _addcarryx_u32(x60, x11, x35, &x62);
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(x63, x13, x37, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x15, x39, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x17, x41, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x19, x43, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x21, x45, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x23, x47, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x25, x49, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x27, x51, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x26, x50, &x89);
+{ uint32_t x92; uint8_t x93 = _subborrow_u32(0x0, x53, 0xffffffff, &x92);
+{ uint32_t x95; uint8_t x96 = _subborrow_u32(x93, x56, 0xffffffff, &x95);
+{ uint32_t x98; uint8_t x99 = _subborrow_u32(x96, x59, 0xffffffff, &x98);
+{ uint32_t x101; uint8_t x102 = _subborrow_u32(x99, x62, 0xffffffff, &x101);
+{ uint32_t x104; uint8_t x105 = _subborrow_u32(x102, x65, 0xffffffff, &x104);
+{ uint32_t x107; uint8_t x108 = _subborrow_u32(x105, x68, 0xffffffff, &x107);
+{ uint32_t x110; uint8_t x111 = _subborrow_u32(x108, x71, 0xfffeffff, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(x111, x74, 0xffffffff, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x77, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x80, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x83, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x86, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x89, 0xffffffff, &x128);
+{ uint32_t _; uint8_t x132 = _subborrow_u32(x129, x90, 0x0, &_);
+{ uint32_t x133 = cmovznz(x132, x128, x89);
+{ uint32_t x134 = cmovznz(x132, x125, x86);
+{ uint32_t x135 = cmovznz(x132, x122, x83);
+{ uint32_t x136 = cmovznz(x132, x119, x80);
+{ uint32_t x137 = cmovznz(x132, x116, x77);
+{ uint32_t x138 = cmovznz(x132, x113, x74);
+{ uint32_t x139 = cmovznz(x132, x110, x71);
+{ uint32_t x140 = cmovznz(x132, x107, x68);
+{ uint32_t x141 = cmovznz(x132, x104, x65);
+{ uint32_t x142 = cmovznz(x132, x101, x62);
+{ uint32_t x143 = cmovznz(x132, x98, x59);
+{ uint32_t x144 = cmovznz(x132, x95, x56);
+{ uint32_t x145 = cmovznz(x132, x92, x53);
+out[0] = x133;
+out[1] = x134;
+out[2] = x135;
+out[3] = x136;
+out[4] = x137;
+out[5] = x138;
+out[6] = x139;
+out[7] = x140;
+out[8] = x141;
+out[9] = x142;
+out[10] = x143;
+out[11] = x144;
+out[12] = x145;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/montgomery32_2e416m2e208m1/feadd.h b/src/Specific/montgomery32_2e416m2e208m1/feadd.h
new file mode 100644
index 000000000..b10483f1f
--- /dev/null
+++ b/src/Specific/montgomery32_2e416m2e208m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/montgomery32_2e416m2e208m1/fenz.c b/src/Specific/montgomery32_2e416m2e208m1/fenz.c
new file mode 100644
index 000000000..521513c0d
--- /dev/null
+++ b/src/Specific/montgomery32_2e416m2e208m1/fenz.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x25 = (x24 | x23);
+{ uint32_t x26 = (x22 | x25);
+{ uint32_t x27 = (x20 | x26);
+{ uint32_t x28 = (x18 | x27);
+{ uint32_t x29 = (x16 | x28);
+{ uint32_t x30 = (x14 | x29);
+{ uint32_t x31 = (x12 | x30);
+{ uint32_t x32 = (x10 | x31);
+{ uint32_t x33 = (x8 | x32);
+{ uint32_t x34 = (x6 | x33);
+{ uint32_t x35 = (x4 | x34);
+{ uint32_t x36 = (x2 | x35);
+out[0] = x36;
+}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e416m2e208m1/fenz.h b/src/Specific/montgomery32_2e416m2e208m1/fenz.h
new file mode 100644
index 000000000..a5976e506
--- /dev/null
+++ b/src/Specific/montgomery32_2e416m2e208m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e444m17/feadd.c b/src/Specific/montgomery32_2e444m17/feadd.c
new file mode 100644
index 000000000..f87eed2d7
--- /dev/null
+++ b/src/Specific/montgomery32_2e444m17/feadd.c
@@ -0,0 +1,78 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
+{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
+{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffef, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
+{ uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
+{ uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
+{ uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
+{ uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xffffffff, &x120);
+{ uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
+{ uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
+{ uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
+{ uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
+{ uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
+{ uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xfffffff, &x138);
+{ uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
+{ uint32_t x143 = cmovznz(x142, x138, x96);
+{ uint32_t x144 = cmovznz(x142, x135, x93);
+{ uint32_t x145 = cmovznz(x142, x132, x90);
+{ uint32_t x146 = cmovznz(x142, x129, x87);
+{ uint32_t x147 = cmovznz(x142, x126, x84);
+{ uint32_t x148 = cmovznz(x142, x123, x81);
+{ uint32_t x149 = cmovznz(x142, x120, x78);
+{ uint32_t x150 = cmovznz(x142, x117, x75);
+{ uint32_t x151 = cmovznz(x142, x114, x72);
+{ uint32_t x152 = cmovznz(x142, x111, x69);
+{ uint32_t x153 = cmovznz(x142, x108, x66);
+{ uint32_t x154 = cmovznz(x142, x105, x63);
+{ uint32_t x155 = cmovznz(x142, x102, x60);
+{ uint32_t x156 = cmovznz(x142, x99, x57);
+out[0] = x143;
+out[1] = x144;
+out[2] = x145;
+out[3] = x146;
+out[4] = x147;
+out[5] = x148;
+out[6] = x149;
+out[7] = x150;
+out[8] = x151;
+out[9] = x152;
+out[10] = x153;
+out[11] = x154;
+out[12] = x155;
+out[13] = x156;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/montgomery32_2e444m17/feadd.h b/src/Specific/montgomery32_2e444m17/feadd.h
new file mode 100644
index 000000000..4acd7bb8f
--- /dev/null
+++ b/src/Specific/montgomery32_2e444m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31);
diff --git a/src/Specific/montgomery32_2e444m17/fenz.c b/src/Specific/montgomery32_2e444m17/fenz.c
new file mode 100644
index 000000000..d631822fd
--- /dev/null
+++ b/src/Specific/montgomery32_2e444m17/fenz.c
@@ -0,0 +1,35 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x27 = (x26 | x25);
+{ uint32_t x28 = (x24 | x27);
+{ uint32_t x29 = (x22 | x28);
+{ uint32_t x30 = (x20 | x29);
+{ uint32_t x31 = (x18 | x30);
+{ uint32_t x32 = (x16 | x31);
+{ uint32_t x33 = (x14 | x32);
+{ uint32_t x34 = (x12 | x33);
+{ uint32_t x35 = (x10 | x34);
+{ uint32_t x36 = (x8 | x35);
+{ uint32_t x37 = (x6 | x36);
+{ uint32_t x38 = (x4 | x37);
+{ uint32_t x39 = (x2 | x38);
+out[0] = x39;
+}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e444m17/fenz.h b/src/Specific/montgomery32_2e444m17/fenz.h
new file mode 100644
index 000000000..fa0c53e85
--- /dev/null
+++ b/src/Specific/montgomery32_2e444m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e448m2e224m1/feadd.c b/src/Specific/montgomery32_2e448m2e224m1/feadd.c
new file mode 100644
index 000000000..5eb3e19d3
--- /dev/null
+++ b/src/Specific/montgomery32_2e448m2e224m1/feadd.c
@@ -0,0 +1,78 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
+{ uint32_t x57; uint8_t x58 = _addcarryx_u32(0x0, x5, x31, &x57);
+{ uint32_t x60; uint8_t x61 = _addcarryx_u32(x58, x7, x33, &x60);
+{ uint32_t x63; uint8_t x64 = _addcarryx_u32(x61, x9, x35, &x63);
+{ uint32_t x66; uint8_t x67 = _addcarryx_u32(x64, x11, x37, &x66);
+{ uint32_t x69; uint8_t x70 = _addcarryx_u32(x67, x13, x39, &x69);
+{ uint32_t x72; uint8_t x73 = _addcarryx_u32(x70, x15, x41, &x72);
+{ uint32_t x75; uint8_t x76 = _addcarryx_u32(x73, x17, x43, &x75);
+{ uint32_t x78; uint8_t x79 = _addcarryx_u32(x76, x19, x45, &x78);
+{ uint32_t x81; uint8_t x82 = _addcarryx_u32(x79, x21, x47, &x81);
+{ uint32_t x84; uint8_t x85 = _addcarryx_u32(x82, x23, x49, &x84);
+{ uint32_t x87; uint8_t x88 = _addcarryx_u32(x85, x25, x51, &x87);
+{ uint32_t x90; uint8_t x91 = _addcarryx_u32(x88, x27, x53, &x90);
+{ uint32_t x93; uint8_t x94 = _addcarryx_u32(x91, x29, x55, &x93);
+{ uint32_t x96; uint8_t x97 = _addcarryx_u32(x94, x28, x54, &x96);
+{ uint32_t x99; uint8_t x100 = _subborrow_u32(0x0, x57, 0xffffffff, &x99);
+{ uint32_t x102; uint8_t x103 = _subborrow_u32(x100, x60, 0xffffffff, &x102);
+{ uint32_t x105; uint8_t x106 = _subborrow_u32(x103, x63, 0xffffffff, &x105);
+{ uint32_t x108; uint8_t x109 = _subborrow_u32(x106, x66, 0xffffffff, &x108);
+{ uint32_t x111; uint8_t x112 = _subborrow_u32(x109, x69, 0xffffffff, &x111);
+{ uint32_t x114; uint8_t x115 = _subborrow_u32(x112, x72, 0xffffffff, &x114);
+{ uint32_t x117; uint8_t x118 = _subborrow_u32(x115, x75, 0xffffffff, &x117);
+{ uint32_t x120; uint8_t x121 = _subborrow_u32(x118, x78, 0xfffffffe, &x120);
+{ uint32_t x123; uint8_t x124 = _subborrow_u32(x121, x81, 0xffffffff, &x123);
+{ uint32_t x126; uint8_t x127 = _subborrow_u32(x124, x84, 0xffffffff, &x126);
+{ uint32_t x129; uint8_t x130 = _subborrow_u32(x127, x87, 0xffffffff, &x129);
+{ uint32_t x132; uint8_t x133 = _subborrow_u32(x130, x90, 0xffffffff, &x132);
+{ uint32_t x135; uint8_t x136 = _subborrow_u32(x133, x93, 0xffffffff, &x135);
+{ uint32_t x138; uint8_t x139 = _subborrow_u32(x136, x96, 0xffffffff, &x138);
+{ uint32_t _; uint8_t x142 = _subborrow_u32(x139, x97, 0x0, &_);
+{ uint32_t x143 = cmovznz(x142, x138, x96);
+{ uint32_t x144 = cmovznz(x142, x135, x93);
+{ uint32_t x145 = cmovznz(x142, x132, x90);
+{ uint32_t x146 = cmovznz(x142, x129, x87);
+{ uint32_t x147 = cmovznz(x142, x126, x84);
+{ uint32_t x148 = cmovznz(x142, x123, x81);
+{ uint32_t x149 = cmovznz(x142, x120, x78);
+{ uint32_t x150 = cmovznz(x142, x117, x75);
+{ uint32_t x151 = cmovznz(x142, x114, x72);
+{ uint32_t x152 = cmovznz(x142, x111, x69);
+{ uint32_t x153 = cmovznz(x142, x108, x66);
+{ uint32_t x154 = cmovznz(x142, x105, x63);
+{ uint32_t x155 = cmovznz(x142, x102, x60);
+{ uint32_t x156 = cmovznz(x142, x99, x57);
+out[0] = x143;
+out[1] = x144;
+out[2] = x145;
+out[3] = x146;
+out[4] = x147;
+out[5] = x148;
+out[6] = x149;
+out[7] = x150;
+out[8] = x151;
+out[9] = x152;
+out[10] = x153;
+out[11] = x154;
+out[12] = x155;
+out[13] = x156;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/montgomery32_2e448m2e224m1/feadd.h b/src/Specific/montgomery32_2e448m2e224m1/feadd.h
new file mode 100644
index 000000000..4acd7bb8f
--- /dev/null
+++ b/src/Specific/montgomery32_2e448m2e224m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31);
diff --git a/src/Specific/montgomery32_2e448m2e224m1/fenz.c b/src/Specific/montgomery32_2e448m2e224m1/fenz.c
new file mode 100644
index 000000000..d631822fd
--- /dev/null
+++ b/src/Specific/montgomery32_2e448m2e224m1/fenz.c
@@ -0,0 +1,35 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x27 = (x26 | x25);
+{ uint32_t x28 = (x24 | x27);
+{ uint32_t x29 = (x22 | x28);
+{ uint32_t x30 = (x20 | x29);
+{ uint32_t x31 = (x18 | x30);
+{ uint32_t x32 = (x16 | x31);
+{ uint32_t x33 = (x14 | x32);
+{ uint32_t x34 = (x12 | x33);
+{ uint32_t x35 = (x10 | x34);
+{ uint32_t x36 = (x8 | x35);
+{ uint32_t x37 = (x6 | x36);
+{ uint32_t x38 = (x4 | x37);
+{ uint32_t x39 = (x2 | x38);
+out[0] = x39;
+}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e448m2e224m1/fenz.h b/src/Specific/montgomery32_2e448m2e224m1/fenz.h
new file mode 100644
index 000000000..fa0c53e85
--- /dev/null
+++ b/src/Specific/montgomery32_2e448m2e224m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e450m2e225m1/feadd.c b/src/Specific/montgomery32_2e450m2e225m1/feadd.c
new file mode 100644
index 000000000..3290f356a
--- /dev/null
+++ b/src/Specific/montgomery32_2e450m2e225m1/feadd.c
@@ -0,0 +1,82 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffffffd, &x127);
+{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0x3, &x148);
+{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+{ uint32_t x153 = cmovznz(x152, x148, x103);
+{ uint32_t x154 = cmovznz(x152, x145, x100);
+{ uint32_t x155 = cmovznz(x152, x142, x97);
+{ uint32_t x156 = cmovznz(x152, x139, x94);
+{ uint32_t x157 = cmovznz(x152, x136, x91);
+{ uint32_t x158 = cmovznz(x152, x133, x88);
+{ uint32_t x159 = cmovznz(x152, x130, x85);
+{ uint32_t x160 = cmovznz(x152, x127, x82);
+{ uint32_t x161 = cmovznz(x152, x124, x79);
+{ uint32_t x162 = cmovznz(x152, x121, x76);
+{ uint32_t x163 = cmovznz(x152, x118, x73);
+{ uint32_t x164 = cmovznz(x152, x115, x70);
+{ uint32_t x165 = cmovznz(x152, x112, x67);
+{ uint32_t x166 = cmovznz(x152, x109, x64);
+{ uint32_t x167 = cmovznz(x152, x106, x61);
+out[0] = x153;
+out[1] = x154;
+out[2] = x155;
+out[3] = x156;
+out[4] = x157;
+out[5] = x158;
+out[6] = x159;
+out[7] = x160;
+out[8] = x161;
+out[9] = x162;
+out[10] = x163;
+out[11] = x164;
+out[12] = x165;
+out[13] = x166;
+out[14] = x167;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/montgomery32_2e450m2e225m1/feadd.h b/src/Specific/montgomery32_2e450m2e225m1/feadd.h
new file mode 100644
index 000000000..8f380a44a
--- /dev/null
+++ b/src/Specific/montgomery32_2e450m2e225m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/montgomery32_2e450m2e225m1/fenz.c b/src/Specific/montgomery32_2e450m2e225m1/fenz.c
new file mode 100644
index 000000000..76004e346
--- /dev/null
+++ b/src/Specific/montgomery32_2e450m2e225m1/fenz.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x29 = (x28 | x27);
+{ uint32_t x30 = (x26 | x29);
+{ uint32_t x31 = (x24 | x30);
+{ uint32_t x32 = (x22 | x31);
+{ uint32_t x33 = (x20 | x32);
+{ uint32_t x34 = (x18 | x33);
+{ uint32_t x35 = (x16 | x34);
+{ uint32_t x36 = (x14 | x35);
+{ uint32_t x37 = (x12 | x36);
+{ uint32_t x38 = (x10 | x37);
+{ uint32_t x39 = (x8 | x38);
+{ uint32_t x40 = (x6 | x39);
+{ uint32_t x41 = (x4 | x40);
+{ uint32_t x42 = (x2 | x41);
+out[0] = x42;
+}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e450m2e225m1/fenz.h b/src/Specific/montgomery32_2e450m2e225m1/fenz.h
new file mode 100644
index 000000000..e0ceb4efe
--- /dev/null
+++ b/src/Specific/montgomery32_2e450m2e225m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e452m3/feadd.c b/src/Specific/montgomery32_2e452m3/feadd.c
new file mode 100644
index 000000000..b156fe974
--- /dev/null
+++ b/src/Specific/montgomery32_2e452m3/feadd.c
@@ -0,0 +1,82 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xfffffffd, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
+{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xf, &x148);
+{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+{ uint32_t x153 = cmovznz(x152, x148, x103);
+{ uint32_t x154 = cmovznz(x152, x145, x100);
+{ uint32_t x155 = cmovznz(x152, x142, x97);
+{ uint32_t x156 = cmovznz(x152, x139, x94);
+{ uint32_t x157 = cmovznz(x152, x136, x91);
+{ uint32_t x158 = cmovznz(x152, x133, x88);
+{ uint32_t x159 = cmovznz(x152, x130, x85);
+{ uint32_t x160 = cmovznz(x152, x127, x82);
+{ uint32_t x161 = cmovznz(x152, x124, x79);
+{ uint32_t x162 = cmovznz(x152, x121, x76);
+{ uint32_t x163 = cmovznz(x152, x118, x73);
+{ uint32_t x164 = cmovznz(x152, x115, x70);
+{ uint32_t x165 = cmovznz(x152, x112, x67);
+{ uint32_t x166 = cmovznz(x152, x109, x64);
+{ uint32_t x167 = cmovznz(x152, x106, x61);
+out[0] = x153;
+out[1] = x154;
+out[2] = x155;
+out[3] = x156;
+out[4] = x157;
+out[5] = x158;
+out[6] = x159;
+out[7] = x160;
+out[8] = x161;
+out[9] = x162;
+out[10] = x163;
+out[11] = x164;
+out[12] = x165;
+out[13] = x166;
+out[14] = x167;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/montgomery32_2e452m3/feadd.h b/src/Specific/montgomery32_2e452m3/feadd.h
new file mode 100644
index 000000000..8f380a44a
--- /dev/null
+++ b/src/Specific/montgomery32_2e452m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/montgomery32_2e452m3/fenz.c b/src/Specific/montgomery32_2e452m3/fenz.c
new file mode 100644
index 000000000..76004e346
--- /dev/null
+++ b/src/Specific/montgomery32_2e452m3/fenz.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x29 = (x28 | x27);
+{ uint32_t x30 = (x26 | x29);
+{ uint32_t x31 = (x24 | x30);
+{ uint32_t x32 = (x22 | x31);
+{ uint32_t x33 = (x20 | x32);
+{ uint32_t x34 = (x18 | x33);
+{ uint32_t x35 = (x16 | x34);
+{ uint32_t x36 = (x14 | x35);
+{ uint32_t x37 = (x12 | x36);
+{ uint32_t x38 = (x10 | x37);
+{ uint32_t x39 = (x8 | x38);
+{ uint32_t x40 = (x6 | x39);
+{ uint32_t x41 = (x4 | x40);
+{ uint32_t x42 = (x2 | x41);
+out[0] = x42;
+}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e452m3/fenz.h b/src/Specific/montgomery32_2e452m3/fenz.h
new file mode 100644
index 000000000..e0ceb4efe
--- /dev/null
+++ b/src/Specific/montgomery32_2e452m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e468m17/feadd.c b/src/Specific/montgomery32_2e468m17/feadd.c
new file mode 100644
index 000000000..2279360ae
--- /dev/null
+++ b/src/Specific/montgomery32_2e468m17/feadd.c
@@ -0,0 +1,82 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffef, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xffffffff, &x127);
+{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xfffff, &x148);
+{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+{ uint32_t x153 = cmovznz(x152, x148, x103);
+{ uint32_t x154 = cmovznz(x152, x145, x100);
+{ uint32_t x155 = cmovznz(x152, x142, x97);
+{ uint32_t x156 = cmovznz(x152, x139, x94);
+{ uint32_t x157 = cmovznz(x152, x136, x91);
+{ uint32_t x158 = cmovznz(x152, x133, x88);
+{ uint32_t x159 = cmovznz(x152, x130, x85);
+{ uint32_t x160 = cmovznz(x152, x127, x82);
+{ uint32_t x161 = cmovznz(x152, x124, x79);
+{ uint32_t x162 = cmovznz(x152, x121, x76);
+{ uint32_t x163 = cmovznz(x152, x118, x73);
+{ uint32_t x164 = cmovznz(x152, x115, x70);
+{ uint32_t x165 = cmovznz(x152, x112, x67);
+{ uint32_t x166 = cmovznz(x152, x109, x64);
+{ uint32_t x167 = cmovznz(x152, x106, x61);
+out[0] = x153;
+out[1] = x154;
+out[2] = x155;
+out[3] = x156;
+out[4] = x157;
+out[5] = x158;
+out[6] = x159;
+out[7] = x160;
+out[8] = x161;
+out[9] = x162;
+out[10] = x163;
+out[11] = x164;
+out[12] = x165;
+out[13] = x166;
+out[14] = x167;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/montgomery32_2e468m17/feadd.h b/src/Specific/montgomery32_2e468m17/feadd.h
new file mode 100644
index 000000000..8f380a44a
--- /dev/null
+++ b/src/Specific/montgomery32_2e468m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/montgomery32_2e468m17/fenz.c b/src/Specific/montgomery32_2e468m17/fenz.c
new file mode 100644
index 000000000..76004e346
--- /dev/null
+++ b/src/Specific/montgomery32_2e468m17/fenz.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x29 = (x28 | x27);
+{ uint32_t x30 = (x26 | x29);
+{ uint32_t x31 = (x24 | x30);
+{ uint32_t x32 = (x22 | x31);
+{ uint32_t x33 = (x20 | x32);
+{ uint32_t x34 = (x18 | x33);
+{ uint32_t x35 = (x16 | x34);
+{ uint32_t x36 = (x14 | x35);
+{ uint32_t x37 = (x12 | x36);
+{ uint32_t x38 = (x10 | x37);
+{ uint32_t x39 = (x8 | x38);
+{ uint32_t x40 = (x6 | x39);
+{ uint32_t x41 = (x4 | x40);
+{ uint32_t x42 = (x2 | x41);
+out[0] = x42;
+}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e468m17/fenz.h b/src/Specific/montgomery32_2e468m17/fenz.h
new file mode 100644
index 000000000..e0ceb4efe
--- /dev/null
+++ b/src/Specific/montgomery32_2e468m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e480m2e240m1/feadd.c b/src/Specific/montgomery32_2e480m2e240m1/feadd.c
new file mode 100644
index 000000000..e3a7b717d
--- /dev/null
+++ b/src/Specific/montgomery32_2e480m2e240m1/feadd.c
@@ -0,0 +1,82 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint32_t x61; uint8_t x62 = _addcarryx_u32(0x0, x5, x33, &x61);
+{ uint32_t x64; uint8_t x65 = _addcarryx_u32(x62, x7, x35, &x64);
+{ uint32_t x67; uint8_t x68 = _addcarryx_u32(x65, x9, x37, &x67);
+{ uint32_t x70; uint8_t x71 = _addcarryx_u32(x68, x11, x39, &x70);
+{ uint32_t x73; uint8_t x74 = _addcarryx_u32(x71, x13, x41, &x73);
+{ uint32_t x76; uint8_t x77 = _addcarryx_u32(x74, x15, x43, &x76);
+{ uint32_t x79; uint8_t x80 = _addcarryx_u32(x77, x17, x45, &x79);
+{ uint32_t x82; uint8_t x83 = _addcarryx_u32(x80, x19, x47, &x82);
+{ uint32_t x85; uint8_t x86 = _addcarryx_u32(x83, x21, x49, &x85);
+{ uint32_t x88; uint8_t x89 = _addcarryx_u32(x86, x23, x51, &x88);
+{ uint32_t x91; uint8_t x92 = _addcarryx_u32(x89, x25, x53, &x91);
+{ uint32_t x94; uint8_t x95 = _addcarryx_u32(x92, x27, x55, &x94);
+{ uint32_t x97; uint8_t x98 = _addcarryx_u32(x95, x29, x57, &x97);
+{ uint32_t x100; uint8_t x101 = _addcarryx_u32(x98, x31, x59, &x100);
+{ uint32_t x103; uint8_t x104 = _addcarryx_u32(x101, x30, x58, &x103);
+{ uint32_t x106; uint8_t x107 = _subborrow_u32(0x0, x61, 0xffffffff, &x106);
+{ uint32_t x109; uint8_t x110 = _subborrow_u32(x107, x64, 0xffffffff, &x109);
+{ uint32_t x112; uint8_t x113 = _subborrow_u32(x110, x67, 0xffffffff, &x112);
+{ uint32_t x115; uint8_t x116 = _subborrow_u32(x113, x70, 0xffffffff, &x115);
+{ uint32_t x118; uint8_t x119 = _subborrow_u32(x116, x73, 0xffffffff, &x118);
+{ uint32_t x121; uint8_t x122 = _subborrow_u32(x119, x76, 0xffffffff, &x121);
+{ uint32_t x124; uint8_t x125 = _subborrow_u32(x122, x79, 0xffffffff, &x124);
+{ uint32_t x127; uint8_t x128 = _subborrow_u32(x125, x82, 0xfffeffff, &x127);
+{ uint32_t x130; uint8_t x131 = _subborrow_u32(x128, x85, 0xffffffff, &x130);
+{ uint32_t x133; uint8_t x134 = _subborrow_u32(x131, x88, 0xffffffff, &x133);
+{ uint32_t x136; uint8_t x137 = _subborrow_u32(x134, x91, 0xffffffff, &x136);
+{ uint32_t x139; uint8_t x140 = _subborrow_u32(x137, x94, 0xffffffff, &x139);
+{ uint32_t x142; uint8_t x143 = _subborrow_u32(x140, x97, 0xffffffff, &x142);
+{ uint32_t x145; uint8_t x146 = _subborrow_u32(x143, x100, 0xffffffff, &x145);
+{ uint32_t x148; uint8_t x149 = _subborrow_u32(x146, x103, 0xffffffff, &x148);
+{ uint32_t _; uint8_t x152 = _subborrow_u32(x149, x104, 0x0, &_);
+{ uint32_t x153 = cmovznz(x152, x148, x103);
+{ uint32_t x154 = cmovznz(x152, x145, x100);
+{ uint32_t x155 = cmovznz(x152, x142, x97);
+{ uint32_t x156 = cmovznz(x152, x139, x94);
+{ uint32_t x157 = cmovznz(x152, x136, x91);
+{ uint32_t x158 = cmovznz(x152, x133, x88);
+{ uint32_t x159 = cmovznz(x152, x130, x85);
+{ uint32_t x160 = cmovznz(x152, x127, x82);
+{ uint32_t x161 = cmovznz(x152, x124, x79);
+{ uint32_t x162 = cmovznz(x152, x121, x76);
+{ uint32_t x163 = cmovznz(x152, x118, x73);
+{ uint32_t x164 = cmovznz(x152, x115, x70);
+{ uint32_t x165 = cmovznz(x152, x112, x67);
+{ uint32_t x166 = cmovznz(x152, x109, x64);
+{ uint32_t x167 = cmovznz(x152, x106, x61);
+out[0] = x153;
+out[1] = x154;
+out[2] = x155;
+out[3] = x156;
+out[4] = x157;
+out[5] = x158;
+out[6] = x159;
+out[7] = x160;
+out[8] = x161;
+out[9] = x162;
+out[10] = x163;
+out[11] = x164;
+out[12] = x165;
+out[13] = x166;
+out[14] = x167;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/montgomery32_2e480m2e240m1/feadd.h b/src/Specific/montgomery32_2e480m2e240m1/feadd.h
new file mode 100644
index 000000000..8f380a44a
--- /dev/null
+++ b/src/Specific/montgomery32_2e480m2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/montgomery32_2e480m2e240m1/fenz.c b/src/Specific/montgomery32_2e480m2e240m1/fenz.c
new file mode 100644
index 000000000..76004e346
--- /dev/null
+++ b/src/Specific/montgomery32_2e480m2e240m1/fenz.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x29 = (x28 | x27);
+{ uint32_t x30 = (x26 | x29);
+{ uint32_t x31 = (x24 | x30);
+{ uint32_t x32 = (x22 | x31);
+{ uint32_t x33 = (x20 | x32);
+{ uint32_t x34 = (x18 | x33);
+{ uint32_t x35 = (x16 | x34);
+{ uint32_t x36 = (x14 | x35);
+{ uint32_t x37 = (x12 | x36);
+{ uint32_t x38 = (x10 | x37);
+{ uint32_t x39 = (x8 | x38);
+{ uint32_t x40 = (x6 | x39);
+{ uint32_t x41 = (x4 | x40);
+{ uint32_t x42 = (x2 | x41);
+out[0] = x42;
+}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e480m2e240m1/fenz.h b/src/Specific/montgomery32_2e480m2e240m1/fenz.h
new file mode 100644
index 000000000..e0ceb4efe
--- /dev/null
+++ b/src/Specific/montgomery32_2e480m2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e488m17/feadd.c b/src/Specific/montgomery32_2e488m17/feadd.c
new file mode 100644
index 000000000..f13bcd9d5
--- /dev/null
+++ b/src/Specific/montgomery32_2e488m17/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffef, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e488m17/feadd.h b/src/Specific/montgomery32_2e488m17/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e488m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e488m17/fenz.c b/src/Specific/montgomery32_2e488m17/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e488m17/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e488m17/fenz.h b/src/Specific/montgomery32_2e488m17/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e488m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e489m21/feadd.c b/src/Specific/montgomery32_2e489m21/feadd.c
new file mode 100644
index 000000000..88920f35e
--- /dev/null
+++ b/src/Specific/montgomery32_2e489m21/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffeb, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x1ff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e489m21/feadd.h b/src/Specific/montgomery32_2e489m21/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e489m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e489m21/fenz.c b/src/Specific/montgomery32_2e489m21/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e489m21/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e489m21/fenz.h b/src/Specific/montgomery32_2e489m21/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e489m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e495m31/feadd.c b/src/Specific/montgomery32_2e495m31/feadd.c
new file mode 100644
index 000000000..ebc163343
--- /dev/null
+++ b/src/Specific/montgomery32_2e495m31/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffffe1, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e495m31/feadd.h b/src/Specific/montgomery32_2e495m31/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e495m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e495m31/fenz.c b/src/Specific/montgomery32_2e495m31/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e495m31/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e495m31/fenz.h b/src/Specific/montgomery32_2e495m31/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e495m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e510m290x2e496m1/fenz.h b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e510m290x2e496m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e511m187/feadd.c b/src/Specific/montgomery32_2e511m187/feadd.c
new file mode 100644
index 000000000..9dd18938f
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m187/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xffffff45, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e511m187/feadd.h b/src/Specific/montgomery32_2e511m187/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m187/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e511m187/fenz.c b/src/Specific/montgomery32_2e511m187/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m187/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e511m187/fenz.h b/src/Specific/montgomery32_2e511m187/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m187/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e511m481/feadd.c b/src/Specific/montgomery32_2e511m481/feadd.c
new file mode 100644
index 000000000..42b332afd
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m481/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffe1f, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0x7fffffff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e511m481/feadd.h b/src/Specific/montgomery32_2e511m481/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m481/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e511m481/feaddDisplay.log b/src/Specific/montgomery32_2e511m481/feaddDisplay.log
index 25b5b30b8..5e2a7b1b6 100644
--- a/src/Specific/montgomery32_2e511m481/feaddDisplay.log
+++ b/src/Specific/montgomery32_2e511m481/feaddDisplay.log
@@ -18,7 +18,7 @@ Interp-η
uint32_t x104, uint8_t x105 = addcarryx_u32(x102, x31, x61);
uint32_t x107, uint8_t x108 = addcarryx_u32(x105, x33, x63);
uint32_t x110, uint8_t x111 = addcarryx_u32(x108, x32, x62);
- uint32_t x113, uint8_t x114 = subborrow_u32(0x0, x65, Const 4294966815);
+ uint32_t x113, uint8_t x114 = subborrow_u32(0x0, x65, 0xfffffe1f);
uint32_t x116, uint8_t x117 = subborrow_u32(x114, x68, 0xffffffff);
uint32_t x119, uint8_t x120 = subborrow_u32(x117, x71, 0xffffffff);
uint32_t x122, uint8_t x123 = subborrow_u32(x120, x74, 0xffffffff);
diff --git a/src/Specific/montgomery32_2e511m481/fenz.c b/src/Specific/montgomery32_2e511m481/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m481/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e511m481/fenz.h b/src/Specific/montgomery32_2e511m481/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e511m481/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e512m491x2e496m1/fenz.h b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m491x2e496m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e512m569/feadd.c b/src/Specific/montgomery32_2e512m569/feadd.c
new file mode 100644
index 000000000..261722f4c
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m569/feadd.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint32_t x65; uint8_t x66 = _addcarryx_u32(0x0, x5, x35, &x65);
+{ uint32_t x68; uint8_t x69 = _addcarryx_u32(x66, x7, x37, &x68);
+{ uint32_t x71; uint8_t x72 = _addcarryx_u32(x69, x9, x39, &x71);
+{ uint32_t x74; uint8_t x75 = _addcarryx_u32(x72, x11, x41, &x74);
+{ uint32_t x77; uint8_t x78 = _addcarryx_u32(x75, x13, x43, &x77);
+{ uint32_t x80; uint8_t x81 = _addcarryx_u32(x78, x15, x45, &x80);
+{ uint32_t x83; uint8_t x84 = _addcarryx_u32(x81, x17, x47, &x83);
+{ uint32_t x86; uint8_t x87 = _addcarryx_u32(x84, x19, x49, &x86);
+{ uint32_t x89; uint8_t x90 = _addcarryx_u32(x87, x21, x51, &x89);
+{ uint32_t x92; uint8_t x93 = _addcarryx_u32(x90, x23, x53, &x92);
+{ uint32_t x95; uint8_t x96 = _addcarryx_u32(x93, x25, x55, &x95);
+{ uint32_t x98; uint8_t x99 = _addcarryx_u32(x96, x27, x57, &x98);
+{ uint32_t x101; uint8_t x102 = _addcarryx_u32(x99, x29, x59, &x101);
+{ uint32_t x104; uint8_t x105 = _addcarryx_u32(x102, x31, x61, &x104);
+{ uint32_t x107; uint8_t x108 = _addcarryx_u32(x105, x33, x63, &x107);
+{ uint32_t x110; uint8_t x111 = _addcarryx_u32(x108, x32, x62, &x110);
+{ uint32_t x113; uint8_t x114 = _subborrow_u32(0x0, x65, 0xfffffdc7, &x113);
+{ uint32_t x116; uint8_t x117 = _subborrow_u32(x114, x68, 0xffffffff, &x116);
+{ uint32_t x119; uint8_t x120 = _subborrow_u32(x117, x71, 0xffffffff, &x119);
+{ uint32_t x122; uint8_t x123 = _subborrow_u32(x120, x74, 0xffffffff, &x122);
+{ uint32_t x125; uint8_t x126 = _subborrow_u32(x123, x77, 0xffffffff, &x125);
+{ uint32_t x128; uint8_t x129 = _subborrow_u32(x126, x80, 0xffffffff, &x128);
+{ uint32_t x131; uint8_t x132 = _subborrow_u32(x129, x83, 0xffffffff, &x131);
+{ uint32_t x134; uint8_t x135 = _subborrow_u32(x132, x86, 0xffffffff, &x134);
+{ uint32_t x137; uint8_t x138 = _subborrow_u32(x135, x89, 0xffffffff, &x137);
+{ uint32_t x140; uint8_t x141 = _subborrow_u32(x138, x92, 0xffffffff, &x140);
+{ uint32_t x143; uint8_t x144 = _subborrow_u32(x141, x95, 0xffffffff, &x143);
+{ uint32_t x146; uint8_t x147 = _subborrow_u32(x144, x98, 0xffffffff, &x146);
+{ uint32_t x149; uint8_t x150 = _subborrow_u32(x147, x101, 0xffffffff, &x149);
+{ uint32_t x152; uint8_t x153 = _subborrow_u32(x150, x104, 0xffffffff, &x152);
+{ uint32_t x155; uint8_t x156 = _subborrow_u32(x153, x107, 0xffffffff, &x155);
+{ uint32_t x158; uint8_t x159 = _subborrow_u32(x156, x110, 0xffffffff, &x158);
+{ uint32_t _; uint8_t x162 = _subborrow_u32(x159, x111, 0x0, &_);
+{ uint32_t x163 = cmovznz(x162, x158, x110);
+{ uint32_t x164 = cmovznz(x162, x155, x107);
+{ uint32_t x165 = cmovznz(x162, x152, x104);
+{ uint32_t x166 = cmovznz(x162, x149, x101);
+{ uint32_t x167 = cmovznz(x162, x146, x98);
+{ uint32_t x168 = cmovznz(x162, x143, x95);
+{ uint32_t x169 = cmovznz(x162, x140, x92);
+{ uint32_t x170 = cmovznz(x162, x137, x89);
+{ uint32_t x171 = cmovznz(x162, x134, x86);
+{ uint32_t x172 = cmovznz(x162, x131, x83);
+{ uint32_t x173 = cmovznz(x162, x128, x80);
+{ uint32_t x174 = cmovznz(x162, x125, x77);
+{ uint32_t x175 = cmovznz(x162, x122, x74);
+{ uint32_t x176 = cmovznz(x162, x119, x71);
+{ uint32_t x177 = cmovznz(x162, x116, x68);
+{ uint32_t x178 = cmovznz(x162, x113, x65);
+out[0] = x163;
+out[1] = x164;
+out[2] = x165;
+out[3] = x166;
+out[4] = x167;
+out[5] = x168;
+out[6] = x169;
+out[7] = x170;
+out[8] = x171;
+out[9] = x172;
+out[10] = x173;
+out[11] = x174;
+out[12] = x175;
+out[13] = x176;
+out[14] = x177;
+out[15] = x178;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/montgomery32_2e512m569/feadd.h b/src/Specific/montgomery32_2e512m569/feadd.h
new file mode 100644
index 000000000..b80152c31
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m569/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/montgomery32_2e512m569/feaddDisplay.log b/src/Specific/montgomery32_2e512m569/feaddDisplay.log
index e4f8a3767..df85b18ee 100644
--- a/src/Specific/montgomery32_2e512m569/feaddDisplay.log
+++ b/src/Specific/montgomery32_2e512m569/feaddDisplay.log
@@ -18,7 +18,7 @@ Interp-η
uint32_t x104, uint8_t x105 = addcarryx_u32(x102, x31, x61);
uint32_t x107, uint8_t x108 = addcarryx_u32(x105, x33, x63);
uint32_t x110, uint8_t x111 = addcarryx_u32(x108, x32, x62);
- uint32_t x113, uint8_t x114 = subborrow_u32(0x0, x65, Const 4294966727);
+ uint32_t x113, uint8_t x114 = subborrow_u32(0x0, x65, 0xfffffdc7);
uint32_t x116, uint8_t x117 = subborrow_u32(x114, x68, 0xffffffff);
uint32_t x119, uint8_t x120 = subborrow_u32(x117, x71, 0xffffffff);
uint32_t x122, uint8_t x123 = subborrow_u32(x120, x74, 0xffffffff);
diff --git a/src/Specific/montgomery32_2e512m569/fenz.c b/src/Specific/montgomery32_2e512m569/fenz.c
new file mode 100644
index 000000000..9290e38c5
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m569/fenz.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x31 = (x30 | x29);
+{ uint32_t x32 = (x28 | x31);
+{ uint32_t x33 = (x26 | x32);
+{ uint32_t x34 = (x24 | x33);
+{ uint32_t x35 = (x22 | x34);
+{ uint32_t x36 = (x20 | x35);
+{ uint32_t x37 = (x18 | x36);
+{ uint32_t x38 = (x16 | x37);
+{ uint32_t x39 = (x14 | x38);
+{ uint32_t x40 = (x12 | x39);
+{ uint32_t x41 = (x10 | x40);
+{ uint32_t x42 = (x8 | x41);
+{ uint32_t x43 = (x6 | x42);
+{ uint32_t x44 = (x4 | x43);
+{ uint32_t x45 = (x2 | x44);
+out[0] = x45;
+}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e512m569/fenz.h b/src/Specific/montgomery32_2e512m569/fenz.h
new file mode 100644
index 000000000..941e49edd
--- /dev/null
+++ b/src/Specific/montgomery32_2e512m569/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery32_2e521m1/fenz.c b/src/Specific/montgomery32_2e521m1/fenz.c
new file mode 100644
index 000000000..50ef7bf8b
--- /dev/null
+++ b/src/Specific/montgomery32_2e521m1/fenz.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x31, uint64_t x32, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint32_t x33 = (x32 | x31);
+{ uint32_t x34 = (x30 | x33);
+{ uint32_t x35 = (x28 | x34);
+{ uint32_t x36 = (x26 | x35);
+{ uint32_t x37 = (x24 | x36);
+{ uint32_t x38 = (x22 | x37);
+{ uint32_t x39 = (x20 | x38);
+{ uint32_t x40 = (x18 | x39);
+{ uint32_t x41 = (x16 | x40);
+{ uint32_t x42 = (x14 | x41);
+{ uint32_t x43 = (x12 | x42);
+{ uint32_t x44 = (x10 | x43);
+{ uint32_t x45 = (x8 | x44);
+{ uint32_t x46 = (x6 | x45);
+{ uint32_t x47 = (x4 | x46);
+{ uint32_t x48 = (x2 | x47);
+out[0] = x48;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery32_2e521m1/fenz.h b/src/Specific/montgomery32_2e521m1/fenz.h
new file mode 100644
index 000000000..d53f64ce9
--- /dev/null
+++ b/src/Specific/montgomery32_2e521m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x31, uint64_t x32, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e127m1/feadd.c b/src/Specific/montgomery64_2e127m1/feadd.c
new file mode 100644
index 000000000..f2b1d1573
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/feadd.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
+{ uint64_t x9; uint8_t x10 = _addcarryx_u64(0x0, x5, x7, &x9);
+{ uint64_t x12; uint8_t x13 = _addcarryx_u64(x10, x4, x6, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(0x0, x9, 0xffffffffffffffffL, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, x12, 0x7fffffffffffffffL, &x18);
+{ uint64_t _; uint8_t x22 = _subborrow_u64(x19, x13, 0x0, &_);
+{ uint64_t x23 = cmovznz(x22, x18, x12);
+{ uint64_t x24 = cmovznz(x22, x15, x9);
+out[0] = x23;
+out[1] = x24;
+}}}}}}}
+// caller: uint64_t out[2];
diff --git a/src/Specific/montgomery64_2e127m1/feadd.h b/src/Specific/montgomery64_2e127m1/feadd.h
new file mode 100644
index 000000000..843a45172
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7);
diff --git a/src/Specific/montgomery64_2e127m1/femul.c b/src/Specific/montgomery64_2e127m1/femul.c
new file mode 100644
index 000000000..3f5813e56
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/femul.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
+{ uint64_t x10; uint64_t x9 = _mulx_u64(x5, x7, &x10);
+{ uint64_t x13; uint64_t x12 = _mulx_u64(x5, x6, &x13);
+{ uint64_t x15; uint8_t x16 = _addcarryx_u64(0x0, x10, x12, &x15);
+{ uint64_t x18; uint8_t _ = _addcarryx_u64(0x0, x16, x13, &x18);
+{ uint64_t x22; uint64_t x21 = _mulx_u64(x9, 0xffffffffffffffffL, &x22);
+{ uint64_t x25; uint64_t x24 = _mulx_u64(x9, 0x7fffffffffffffffL, &x25);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x22, x24, &x27);
+{ uint64_t x30; uint8_t _ = _addcarryx_u64(0x0, x28, x25, &x30);
+{ uint64_t _; uint8_t x34 = _addcarryx_u64(0x0, x9, x21, &_);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x15, x27, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x18, x30, &x39);
+{ uint64_t x43; uint64_t x42 = _mulx_u64(x4, x7, &x43);
+{ uint64_t x46; uint64_t x45 = _mulx_u64(x4, x6, &x46);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(0x0, x43, x45, &x48);
+{ uint64_t x51; uint8_t _ = _addcarryx_u64(0x0, x49, x46, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(0x0, x36, x42, &x54);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x55, x39, x48, &x57);
+{ uint64_t x60; uint8_t x61 = _addcarryx_u64(x58, x40, x51, &x60);
+{ uint64_t x64; uint64_t x63 = _mulx_u64(x54, 0xffffffffffffffffL, &x64);
+{ uint64_t x67; uint64_t x66 = _mulx_u64(x54, 0x7fffffffffffffffL, &x67);
+{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x64, x66, &x69);
+{ uint64_t x72; uint8_t _ = _addcarryx_u64(0x0, x70, x67, &x72);
+{ uint64_t _; uint8_t x76 = _addcarryx_u64(0x0, x54, x63, &_);
+{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x57, x69, &x78);
+{ uint64_t x81; uint8_t x82 = _addcarryx_u64(x79, x60, x72, &x81);
+{ uint8_t x83 = (x82 + x61);
+{ uint64_t x85; uint8_t x86 = _subborrow_u64(0x0, x78, 0xffffffffffffffffL, &x85);
+{ uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x81, 0x7fffffffffffffffL, &x88);
+{ uint64_t _; uint8_t x92 = _subborrow_u64(x89, x83, 0x0, &_);
+{ uint64_t x93 = cmovznz(x92, x88, x81);
+{ uint64_t x94 = cmovznz(x92, x85, x78);
+out[0] = x93;
+out[1] = x94;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[2];
diff --git a/src/Specific/montgomery64_2e127m1/femul.h b/src/Specific/montgomery64_2e127m1/femul.h
new file mode 100644
index 000000000..9073f5a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7);
diff --git a/src/Specific/montgomery64_2e127m1/fenz.c b/src/Specific/montgomery64_2e127m1/fenz.c
new file mode 100644
index 000000000..a5550fc28
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/fenz.c
@@ -0,0 +1,23 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x1, uint64_t x2)
+{ uint64_t x3 = (x2 | x1);
+out[0] = x3;
+}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e127m1/fenz.h b/src/Specific/montgomery64_2e127m1/fenz.h
new file mode 100644
index 000000000..301bd482f
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x1, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e127m1/feopp.c b/src/Specific/montgomery64_2e127m1/feopp.c
new file mode 100644
index 000000000..94cb256e1
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/feopp.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x1, uint64_t x2)
+{ uint64_t x4; uint8_t x5 = _subborrow_u64(0x0, 0x0, x2, &x4);
+{ uint64_t x7; uint8_t x8 = _subborrow_u64(x5, 0x0, x1, &x7);
+{ uint64_t x9 = (uint64_t)cmovznz(x8, 0x0, 0xffffffffffffffffL);
+{ uint64_t x10 = (x9 & 0xffffffffffffffffL);
+{ uint64_t x12; uint8_t x13 = _addcarryx_u64(0x0, x4, x10, &x12);
+{ uint64_t x14 = (x9 & 0x7fffffffffffffffL);
+{ uint64_t x16; uint8_t _ = _addcarryx_u64(x13, x7, x14, &x16);
+out[0] = x16;
+out[1] = x12;
+}}}}}}}
+// caller: uint64_t out[2];
diff --git a/src/Specific/montgomery64_2e127m1/feopp.h b/src/Specific/montgomery64_2e127m1/feopp.h
new file mode 100644
index 000000000..aecb5c52c
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x1, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e127m1/fesub.c b/src/Specific/montgomery64_2e127m1/fesub.c
new file mode 100644
index 000000000..10326337a
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/fesub.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7)
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(0x0, x5, x7, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, x4, x6, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x9, x15, &x17);
+{ uint64_t x19 = (x14 & 0x7fffffffffffffffL);
+{ uint64_t x21; uint8_t _ = _addcarryx_u64(x18, x12, x19, &x21);
+out[0] = x21;
+out[1] = x17;
+}}}}}}}
+// caller: uint64_t out[2];
diff --git a/src/Specific/montgomery64_2e127m1/fesub.h b/src/Specific/montgomery64_2e127m1/fesub.h
new file mode 100644
index 000000000..ae932e6d4
--- /dev/null
+++ b/src/Specific/montgomery64_2e127m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7);
diff --git a/src/Specific/montgomery64_2e129m25/feadd.c b/src/Specific/montgomery64_2e129m25/feadd.c
new file mode 100644
index 000000000..0d34b8eec
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e129m25/feadd.h b/src/Specific/montgomery64_2e129m25/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e129m25/femul.c b/src/Specific/montgomery64_2e129m25/femul.c
new file mode 100644
index 000000000..43dc51d01
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x35, x37, &x40);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x38, x31, &x43);
+{ uint64_t _; uint8_t x47 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x22, x40, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x25, x43, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x28, x44, &x55);
+{ uint64_t x59; uint64_t x58 = _mulx_u64(x7, x9, &x59);
+{ uint64_t x62; uint64_t x61 = _mulx_u64(x7, x11, &x62);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x10, &x65);
+{ uint64_t x67; uint8_t x68 = _addcarryx_u64(0x0, x59, x61, &x67);
+{ uint64_t x70; uint8_t x71 = _addcarryx_u64(x68, x62, x64, &x70);
+{ uint64_t x73; uint8_t _ = _addcarryx_u64(0x0, x71, x65, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(0x0, x49, x58, &x76);
+{ uint64_t x79; uint8_t x80 = _addcarryx_u64(x77, x52, x67, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x56, x73, &x85);
+{ uint64_t _; uint64_t x88 = _mulx_u64(x76, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x92; uint64_t x91 = _mulx_u64(x88, 0xffffffffffffffe7L, &x92);
+{ uint64_t x95; uint64_t x94 = _mulx_u64(x88, 0xffffffffffffffffL, &x95);
+{ uint64_t x97; uint8_t x98 = _addcarryx_u64(0x0, x92, x94, &x97);
+{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x95, x88, &x100);
+{ uint64_t _; uint8_t x104 = _addcarryx_u64(0x0, x76, x91, &_);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x79, x97, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x82, x100, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x85, x101, &x112);
+{ uint8_t x114 = (x113 + x86);
+{ uint64_t x117; uint64_t x116 = _mulx_u64(x6, x9, &x117);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x6, x11, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x6, x10, &x123);
+{ uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
+{ uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, x122, &x128);
+{ uint64_t x131; uint8_t _ = _addcarryx_u64(0x0, x129, x123, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(0x0, x106, x116, &x134);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x109, x125, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x112, x128, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x114, x131, &x143);
+{ uint64_t _; uint64_t x146 = _mulx_u64(x134, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x150; uint64_t x149 = _mulx_u64(x146, 0xffffffffffffffe7L, &x150);
+{ uint64_t x153; uint64_t x152 = _mulx_u64(x146, 0xffffffffffffffffL, &x153);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(0x0, x150, x152, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x153, x146, &x158);
+{ uint64_t _; uint8_t x162 = _addcarryx_u64(0x0, x134, x149, &_);
+{ uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x137, x155, &x164);
+{ uint64_t x167; uint8_t x168 = _addcarryx_u64(x165, x140, x158, &x167);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(x168, x143, x159, &x170);
+{ uint8_t x172 = (x171 + x144);
+{ uint64_t x174; uint8_t x175 = _subborrow_u64(0x0, x164, 0xffffffffffffffe7L, &x174);
+{ uint64_t x177; uint8_t x178 = _subborrow_u64(x175, x167, 0xffffffffffffffffL, &x177);
+{ uint64_t x180; uint8_t x181 = _subborrow_u64(x178, x170, 0x1, &x180);
+{ uint64_t _; uint8_t x184 = _subborrow_u64(x181, x172, 0x0, &_);
+{ uint64_t x185 = cmovznz(x184, x180, x170);
+{ uint64_t x186 = cmovznz(x184, x177, x167);
+{ uint64_t x187 = cmovznz(x184, x174, x164);
+out[0] = x185;
+out[1] = x186;
+out[2] = x187;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e129m25/femul.h b/src/Specific/montgomery64_2e129m25/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e129m25/fenz.c b/src/Specific/montgomery64_2e129m25/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e129m25/fenz.h b/src/Specific/montgomery64_2e129m25/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e129m25/feopp.c b/src/Specific/montgomery64_2e129m25/feopp.c
new file mode 100644
index 000000000..3d73f8511
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint8_t x23 = ((uint8_t)x14 & 0x1);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e129m25/feopp.h b/src/Specific/montgomery64_2e129m25/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e129m25/fesub.c b/src/Specific/montgomery64_2e129m25/fesub.c
new file mode 100644
index 000000000..f91af8c17
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint8_t x30 = ((uint8_t)x21 & 0x1);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e129m25/fesub.h b/src/Specific/montgomery64_2e129m25/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e129m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e130m5/feadd.c b/src/Specific/montgomery64_2e130m5/feadd.c
new file mode 100644
index 000000000..662a2bda8
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e130m5/feadd.h b/src/Specific/montgomery64_2e130m5/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e130m5/femul.c b/src/Specific/montgomery64_2e130m5/femul.c
new file mode 100644
index 000000000..846d29d24
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/femul.c
@@ -0,0 +1,33 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+out[0] = uint64_t x40;
+out[1] = uint8_t x41 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x31;
+out[2] = 0x3;;
+}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e130m5/femul.h b/src/Specific/montgomery64_2e130m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e130m5/fenz.c b/src/Specific/montgomery64_2e130m5/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e130m5/fenz.h b/src/Specific/montgomery64_2e130m5/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e130m5/feopp.c b/src/Specific/montgomery64_2e130m5/feopp.c
new file mode 100644
index 000000000..1a52fabdb
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint8_t x23 = ((uint8_t)x14 & 0x3);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e130m5/feopp.h b/src/Specific/montgomery64_2e130m5/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e130m5/fesub.c b/src/Specific/montgomery64_2e130m5/fesub.c
new file mode 100644
index 000000000..6443fdde3
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint8_t x30 = ((uint8_t)x21 & 0x3);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e130m5/fesub.h b/src/Specific/montgomery64_2e130m5/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e130m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e137m13/feadd.c b/src/Specific/montgomery64_2e137m13/feadd.c
new file mode 100644
index 000000000..09c3c25a0
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff3L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1ff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e137m13/feadd.h b/src/Specific/montgomery64_2e137m13/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e137m13/femul.c b/src/Specific/montgomery64_2e137m13/femul.c
new file mode 100644
index 000000000..4acf26b69
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x4ec4ec4ec4ec4ec5, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff3L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1ff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x4ec4ec4ec4ec4ec5, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff3L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1ff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x4ec4ec4ec4ec4ec5, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff3L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1ff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff3L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1ff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e137m13/femul.h b/src/Specific/montgomery64_2e137m13/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e137m13/fenz.c b/src/Specific/montgomery64_2e137m13/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e137m13/fenz.h b/src/Specific/montgomery64_2e137m13/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e137m13/feopp.c b/src/Specific/montgomery64_2e137m13/feopp.c
new file mode 100644
index 000000000..3639e9a08
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffff3L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x1ff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e137m13/feopp.h b/src/Specific/montgomery64_2e137m13/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e137m13/fesub.c b/src/Specific/montgomery64_2e137m13/fesub.c
new file mode 100644
index 000000000..d9b7e5783
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffff3L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x1ff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e137m13/fesub.h b/src/Specific/montgomery64_2e137m13/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e137m13/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e140m27/feadd.c b/src/Specific/montgomery64_2e140m27/feadd.c
new file mode 100644
index 000000000..492af78a0
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe5L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xfff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e140m27/feadd.h b/src/Specific/montgomery64_2e140m27/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e140m27/femul.c b/src/Specific/montgomery64_2e140m27/femul.c
new file mode 100644
index 000000000..335361606
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x84bda12f684bda13L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe5L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xfff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x84bda12f684bda13L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe5L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xfff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x84bda12f684bda13L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe5L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xfff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe5L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xfff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e140m27/femul.h b/src/Specific/montgomery64_2e140m27/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e140m27/fenz.c b/src/Specific/montgomery64_2e140m27/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e140m27/fenz.h b/src/Specific/montgomery64_2e140m27/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e140m27/feopp.c b/src/Specific/montgomery64_2e140m27/feopp.c
new file mode 100644
index 000000000..f9d52d171
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffe5L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0xfff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e140m27/feopp.h b/src/Specific/montgomery64_2e140m27/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e140m27/fesub.c b/src/Specific/montgomery64_2e140m27/fesub.c
new file mode 100644
index 000000000..2d144c7b4
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffe5L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0xfff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e140m27/fesub.h b/src/Specific/montgomery64_2e140m27/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e140m27/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e141m9/feadd.c b/src/Specific/montgomery64_2e141m9/feadd.c
new file mode 100644
index 000000000..6d573b5b8
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff7L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e141m9/feadd.h b/src/Specific/montgomery64_2e141m9/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e141m9/femul.c b/src/Specific/montgomery64_2e141m9/femul.c
new file mode 100644
index 000000000..b93beb68a
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff7L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff7L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff7L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff7L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e141m9/femul.h b/src/Specific/montgomery64_2e141m9/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e141m9/fenz.c b/src/Specific/montgomery64_2e141m9/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e141m9/fenz.h b/src/Specific/montgomery64_2e141m9/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e141m9/feopp.c b/src/Specific/montgomery64_2e141m9/feopp.c
new file mode 100644
index 000000000..29462c1fd
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffff7L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x1fff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e141m9/feopp.h b/src/Specific/montgomery64_2e141m9/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e141m9/fesub.c b/src/Specific/montgomery64_2e141m9/fesub.c
new file mode 100644
index 000000000..b414e11a1
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffff7L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x1fff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e141m9/fesub.h b/src/Specific/montgomery64_2e141m9/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e141m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m3/feadd.c b/src/Specific/montgomery64_2e150m3/feadd.c
new file mode 100644
index 000000000..51b20f325
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m3/feadd.h b/src/Specific/montgomery64_2e150m3/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m3/femul.c b/src/Specific/montgomery64_2e150m3/femul.c
new file mode 100644
index 000000000..9dd6af2b7
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m3/femul.h b/src/Specific/montgomery64_2e150m3/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m3/fenz.c b/src/Specific/montgomery64_2e150m3/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e150m3/fenz.h b/src/Specific/montgomery64_2e150m3/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e150m3/feopp.c b/src/Specific/montgomery64_2e150m3/feopp.c
new file mode 100644
index 000000000..a560642cb
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffffdL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m3/feopp.h b/src/Specific/montgomery64_2e150m3/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e150m3/fesub.c b/src/Specific/montgomery64_2e150m3/fesub.c
new file mode 100644
index 000000000..843b2de7b
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffffdL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m3/fesub.h b/src/Specific/montgomery64_2e150m3/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m5/feadd.c b/src/Specific/montgomery64_2e150m5/feadd.c
new file mode 100644
index 000000000..d8642ec59
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m5/feadd.h b/src/Specific/montgomery64_2e150m5/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m5/femul.c b/src/Specific/montgomery64_2e150m5/femul.c
new file mode 100644
index 000000000..3f8eed0bd
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m5/femul.h b/src/Specific/montgomery64_2e150m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e150m5/fenz.c b/src/Specific/montgomery64_2e150m5/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e150m5/fenz.h b/src/Specific/montgomery64_2e150m5/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e150m5/feopp.c b/src/Specific/montgomery64_2e150m5/feopp.c
new file mode 100644
index 000000000..f00debb7e
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m5/feopp.h b/src/Specific/montgomery64_2e150m5/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e150m5/fesub.c b/src/Specific/montgomery64_2e150m5/fesub.c
new file mode 100644
index 000000000..dccdfa918
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e150m5/fesub.h b/src/Specific/montgomery64_2e150m5/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e150m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e152m17/feadd.c b/src/Specific/montgomery64_2e152m17/feadd.c
new file mode 100644
index 000000000..417bc7169
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e152m17/feadd.h b/src/Specific/montgomery64_2e152m17/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e152m17/femul.c b/src/Specific/montgomery64_2e152m17/femul.c
new file mode 100644
index 000000000..4c615812a
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0xffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0xffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0xffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0xffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e152m17/femul.h b/src/Specific/montgomery64_2e152m17/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e152m17/fenz.c b/src/Specific/montgomery64_2e152m17/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e152m17/fenz.h b/src/Specific/montgomery64_2e152m17/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e152m17/feopp.c b/src/Specific/montgomery64_2e152m17/feopp.c
new file mode 100644
index 000000000..e543901b3
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffefL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0xffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e152m17/feopp.h b/src/Specific/montgomery64_2e152m17/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e152m17/fesub.c b/src/Specific/montgomery64_2e152m17/fesub.c
new file mode 100644
index 000000000..e6906e865
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffefL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0xffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e152m17/fesub.h b/src/Specific/montgomery64_2e152m17/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e152m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e158m15/feadd.c b/src/Specific/montgomery64_2e158m15/feadd.c
new file mode 100644
index 000000000..dfea544b4
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff1L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e158m15/feadd.h b/src/Specific/montgomery64_2e158m15/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e158m15/femul.c b/src/Specific/montgomery64_2e158m15/femul.c
new file mode 100644
index 000000000..15e6f0071
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff1L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff1L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff1L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff1L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e158m15/femul.h b/src/Specific/montgomery64_2e158m15/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e158m15/fenz.c b/src/Specific/montgomery64_2e158m15/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e158m15/fenz.h b/src/Specific/montgomery64_2e158m15/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e158m15/feopp.c b/src/Specific/montgomery64_2e158m15/feopp.c
new file mode 100644
index 000000000..ebcf46f1d
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffff1L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e158m15/feopp.h b/src/Specific/montgomery64_2e158m15/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e158m15/fesub.c b/src/Specific/montgomery64_2e158m15/fesub.c
new file mode 100644
index 000000000..23dfa5006
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffff1L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e158m15/fesub.h b/src/Specific/montgomery64_2e158m15/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e158m15/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e165m25/feadd.c b/src/Specific/montgomery64_2e165m25/feadd.c
new file mode 100644
index 000000000..e84dc499d
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e165m25/feadd.h b/src/Specific/montgomery64_2e165m25/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e165m25/femul.c b/src/Specific/montgomery64_2e165m25/femul.c
new file mode 100644
index 000000000..44766da38
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e165m25/femul.h b/src/Specific/montgomery64_2e165m25/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e165m25/fenz.c b/src/Specific/montgomery64_2e165m25/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e165m25/fenz.h b/src/Specific/montgomery64_2e165m25/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e165m25/feopp.c b/src/Specific/montgomery64_2e165m25/feopp.c
new file mode 100644
index 000000000..86c44bbc9
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x1fffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e165m25/feopp.h b/src/Specific/montgomery64_2e165m25/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e165m25/fesub.c b/src/Specific/montgomery64_2e165m25/fesub.c
new file mode 100644
index 000000000..a2b2ca75f
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x1fffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e165m25/fesub.h b/src/Specific/montgomery64_2e165m25/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e165m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e166m5/feadd.c b/src/Specific/montgomery64_2e166m5/feadd.c
new file mode 100644
index 000000000..9d3fdb43c
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffbL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e166m5/feadd.h b/src/Specific/montgomery64_2e166m5/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e166m5/femul.c b/src/Specific/montgomery64_2e166m5/femul.c
new file mode 100644
index 000000000..276f293b4
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xcccccccccccccccdL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffbL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xcccccccccccccccdL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffbL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xcccccccccccccccdL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffbL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffbL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e166m5/femul.h b/src/Specific/montgomery64_2e166m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e166m5/fenz.c b/src/Specific/montgomery64_2e166m5/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e166m5/fenz.h b/src/Specific/montgomery64_2e166m5/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e166m5/feopp.c b/src/Specific/montgomery64_2e166m5/feopp.c
new file mode 100644
index 000000000..e44b1e56a
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffffbL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e166m5/feopp.h b/src/Specific/montgomery64_2e166m5/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e166m5/fesub.c b/src/Specific/montgomery64_2e166m5/fesub.c
new file mode 100644
index 000000000..310529ada
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffffbL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e166m5/fesub.h b/src/Specific/montgomery64_2e166m5/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e166m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e171m19/feadd.c b/src/Specific/montgomery64_2e171m19/feadd.c
new file mode 100644
index 000000000..2b2f07eaa
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7ffffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e171m19/feadd.h b/src/Specific/montgomery64_2e171m19/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e171m19/femul.c b/src/Specific/montgomery64_2e171m19/femul.c
new file mode 100644
index 000000000..5c3bb308e
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7ffffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7ffffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7ffffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7ffffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e171m19/femul.h b/src/Specific/montgomery64_2e171m19/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e171m19/fenz.c b/src/Specific/montgomery64_2e171m19/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e171m19/fenz.h b/src/Specific/montgomery64_2e171m19/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e171m19/feopp.c b/src/Specific/montgomery64_2e171m19/feopp.c
new file mode 100644
index 000000000..2f9d970ef
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffedL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x7ffffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e171m19/feopp.h b/src/Specific/montgomery64_2e171m19/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e171m19/fesub.c b/src/Specific/montgomery64_2e171m19/fesub.c
new file mode 100644
index 000000000..e767e7877
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffedL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x7ffffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e171m19/fesub.h b/src/Specific/montgomery64_2e171m19/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e171m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m17/feadd.c b/src/Specific/montgomery64_2e174m17/feadd.c
new file mode 100644
index 000000000..f7638f962
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffefL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m17/feadd.h b/src/Specific/montgomery64_2e174m17/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m17/femul.c b/src/Specific/montgomery64_2e174m17/femul.c
new file mode 100644
index 000000000..11665587a
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffefL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffefL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffefL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffefL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m17/femul.h b/src/Specific/montgomery64_2e174m17/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m17/fenz.c b/src/Specific/montgomery64_2e174m17/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e174m17/fenz.h b/src/Specific/montgomery64_2e174m17/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e174m17/feopp.c b/src/Specific/montgomery64_2e174m17/feopp.c
new file mode 100644
index 000000000..fd6a9ef94
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffefL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m17/feopp.h b/src/Specific/montgomery64_2e174m17/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e174m17/fesub.c b/src/Specific/montgomery64_2e174m17/fesub.c
new file mode 100644
index 000000000..262cd52ed
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffefL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m17/fesub.h b/src/Specific/montgomery64_2e174m17/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m3/feadd.c b/src/Specific/montgomery64_2e174m3/feadd.c
new file mode 100644
index 000000000..29ed47397
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffffdL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m3/feadd.h b/src/Specific/montgomery64_2e174m3/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m3/femul.c b/src/Specific/montgomery64_2e174m3/femul.c
new file mode 100644
index 000000000..972ea2079
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffffdL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffffdL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffffdL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffffdL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m3/femul.h b/src/Specific/montgomery64_2e174m3/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e174m3/fenz.c b/src/Specific/montgomery64_2e174m3/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e174m3/fenz.h b/src/Specific/montgomery64_2e174m3/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e174m3/feopp.c b/src/Specific/montgomery64_2e174m3/feopp.c
new file mode 100644
index 000000000..70ac07364
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffffdL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m3/feopp.h b/src/Specific/montgomery64_2e174m3/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e174m3/fesub.c b/src/Specific/montgomery64_2e174m3/fesub.c
new file mode 100644
index 000000000..32fe676c0
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffffdL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e174m3/fesub.h b/src/Specific/montgomery64_2e174m3/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e174m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e189m25/feadd.c b/src/Specific/montgomery64_2e189m25/feadd.c
new file mode 100644
index 000000000..436c948b4
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffe7L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x1fffffffffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e189m25/feadd.h b/src/Specific/montgomery64_2e189m25/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e189m25/femul.c b/src/Specific/montgomery64_2e189m25/femul.c
new file mode 100644
index 000000000..0f9466fef
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffe7L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x1fffffffffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffe7L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x1fffffffffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffe7L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x1fffffffffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffe7L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x1fffffffffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e189m25/femul.h b/src/Specific/montgomery64_2e189m25/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e189m25/fenz.c b/src/Specific/montgomery64_2e189m25/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e189m25/fenz.h b/src/Specific/montgomery64_2e189m25/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e189m25/feopp.c b/src/Specific/montgomery64_2e189m25/feopp.c
new file mode 100644
index 000000000..42a1bc53f
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffe7L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x1fffffffffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e189m25/feopp.h b/src/Specific/montgomery64_2e189m25/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e189m25/fesub.c b/src/Specific/montgomery64_2e189m25/fesub.c
new file mode 100644
index 000000000..8274703eb
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffe7L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x1fffffffffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e189m25/fesub.h b/src/Specific/montgomery64_2e189m25/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e189m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e190m11/feadd.c b/src/Specific/montgomery64_2e190m11/feadd.c
new file mode 100644
index 000000000..34d22e878
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xfffffffffffffff5L, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x3fffffffffffffff, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e190m11/feadd.h b/src/Specific/montgomery64_2e190m11/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e190m11/femul.c b/src/Specific/montgomery64_2e190m11/femul.c
new file mode 100644
index 000000000..4be4d3a3c
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x2e8ba2e8ba2e8ba3, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xfffffffffffffff5L, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x3fffffffffffffff, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x2e8ba2e8ba2e8ba3, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xfffffffffffffff5L, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x3fffffffffffffff, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x2e8ba2e8ba2e8ba3, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xfffffffffffffff5L, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x3fffffffffffffff, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xfffffffffffffff5L, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x3fffffffffffffff, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e190m11/femul.h b/src/Specific/montgomery64_2e190m11/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e190m11/fenz.c b/src/Specific/montgomery64_2e190m11/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e190m11/fenz.h b/src/Specific/montgomery64_2e190m11/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e190m11/feopp.c b/src/Specific/montgomery64_2e190m11/feopp.c
new file mode 100644
index 000000000..87b15542f
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xfffffffffffffff5L);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x3fffffffffffffff);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e190m11/feopp.h b/src/Specific/montgomery64_2e190m11/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e190m11/fesub.c b/src/Specific/montgomery64_2e190m11/fesub.c
new file mode 100644
index 000000000..6199b9f1c
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xfffffffffffffff5L);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x3fffffffffffffff);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e190m11/fesub.h b/src/Specific/montgomery64_2e190m11/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e190m11/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e191m19/feadd.c b/src/Specific/montgomery64_2e191m19/feadd.c
new file mode 100644
index 000000000..ca789cbcc
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffedL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xffffffffffffffffL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0x7fffffffffffffffL, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e191m19/feadd.h b/src/Specific/montgomery64_2e191m19/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e191m19/femul.c b/src/Specific/montgomery64_2e191m19/femul.c
new file mode 100644
index 000000000..2d3408128
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/femul.c
@@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t _; uint64_t x31 = _mulx_u64(x13, 0x86bca1af286bca1bL, &_);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x31, 0xffffffffffffffedL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x31, 0xffffffffffffffffL, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x31, 0x7fffffffffffffffL, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x35, x37, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x38, x40, &x46);
+{ uint64_t x49; uint8_t _ = _addcarryx_u64(0x0, x47, x41, &x49);
+{ uint64_t _; uint8_t x53 = _addcarryx_u64(0x0, x13, x34, &_);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x22, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x25, x46, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x28, x49, &x61);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x9, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x11, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x7, x10, &x71);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(0x0, x65, x67, &x73);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u64(x74, x68, x70, &x76);
+{ uint64_t x79; uint8_t _ = _addcarryx_u64(0x0, x77, x71, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x55, x64, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x61, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x62, x79, &x91);
+{ uint64_t _; uint64_t x94 = _mulx_u64(x82, 0x86bca1af286bca1bL, &_);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x94, 0xffffffffffffffedL, &x98);
+{ uint64_t x101; uint64_t x100 = _mulx_u64(x94, 0xffffffffffffffffL, &x101);
+{ uint64_t x104; uint64_t x103 = _mulx_u64(x94, 0x7fffffffffffffffL, &x104);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(0x0, x98, x100, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x101, x103, &x109);
+{ uint64_t x112; uint8_t _ = _addcarryx_u64(0x0, x110, x104, &x112);
+{ uint64_t _; uint8_t x116 = _addcarryx_u64(0x0, x82, x97, &_);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x85, x106, &x118);
+{ uint64_t x121; uint8_t x122 = _addcarryx_u64(x119, x88, x109, &x121);
+{ uint64_t x124; uint8_t x125 = _addcarryx_u64(x122, x91, x112, &x124);
+{ uint8_t x126 = (x125 + x92);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x9, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x6, x11, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x6, x10, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x129, x131, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x132, x134, &x140);
+{ uint64_t x143; uint8_t _ = _addcarryx_u64(0x0, x141, x135, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(0x0, x118, x128, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x121, x137, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x124, x140, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x126, x143, &x155);
+{ uint64_t _; uint64_t x158 = _mulx_u64(x146, 0x86bca1af286bca1bL, &_);
+{ uint64_t x162; uint64_t x161 = _mulx_u64(x158, 0xffffffffffffffedL, &x162);
+{ uint64_t x165; uint64_t x164 = _mulx_u64(x158, 0xffffffffffffffffL, &x165);
+{ uint64_t x168; uint64_t x167 = _mulx_u64(x158, 0x7fffffffffffffffL, &x168);
+{ uint64_t x170; uint8_t x171 = _addcarryx_u64(0x0, x162, x164, &x170);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x165, x167, &x173);
+{ uint64_t x176; uint8_t _ = _addcarryx_u64(0x0, x174, x168, &x176);
+{ uint64_t _; uint8_t x180 = _addcarryx_u64(0x0, x146, x161, &_);
+{ uint64_t x182; uint8_t x183 = _addcarryx_u64(x180, x149, x170, &x182);
+{ uint64_t x185; uint8_t x186 = _addcarryx_u64(x183, x152, x173, &x185);
+{ uint64_t x188; uint8_t x189 = _addcarryx_u64(x186, x155, x176, &x188);
+{ uint8_t x190 = (x189 + x156);
+{ uint64_t x192; uint8_t x193 = _subborrow_u64(0x0, x182, 0xffffffffffffffedL, &x192);
+{ uint64_t x195; uint8_t x196 = _subborrow_u64(x193, x185, 0xffffffffffffffffL, &x195);
+{ uint64_t x198; uint8_t x199 = _subborrow_u64(x196, x188, 0x7fffffffffffffffL, &x198);
+{ uint64_t _; uint8_t x202 = _subborrow_u64(x199, x190, 0x0, &_);
+{ uint64_t x203 = cmovznz(x202, x198, x188);
+{ uint64_t x204 = cmovznz(x202, x195, x185);
+{ uint64_t x205 = cmovznz(x202, x192, x182);
+out[0] = x203;
+out[1] = x204;
+out[2] = x205;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e191m19/femul.h b/src/Specific/montgomery64_2e191m19/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e191m19/fenz.c b/src/Specific/montgomery64_2e191m19/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e191m19/fenz.h b/src/Specific/montgomery64_2e191m19/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e191m19/feopp.c b/src/Specific/montgomery64_2e191m19/feopp.c
new file mode 100644
index 000000000..9e4a88a93
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffedL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0x7fffffffffffffffL);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e191m19/feopp.h b/src/Specific/montgomery64_2e191m19/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e191m19/fesub.c b/src/Specific/montgomery64_2e191m19/fesub.c
new file mode 100644
index 000000000..20d305083
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffedL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0x7fffffffffffffffL);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e191m19/fesub.h b/src/Specific/montgomery64_2e191m19/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e191m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feadd.c b/src/Specific/montgomery64_2e192m2e64m1/feadd.c
new file mode 100644
index 000000000..66b2d1360
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/feadd.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _addcarryx_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _addcarryx_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _addcarryx_u64(x17, x6, x10, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(0x0, x13, 0xffffffffffffffffL, &x22);
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(x23, x16, 0xfffffffffffffffeL, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x19, 0xffffffffffffffffL, &x28);
+{ uint64_t _; uint8_t x32 = _subborrow_u64(x29, x20, 0x0, &_);
+{ uint64_t x33 = cmovznz(x32, x28, x19);
+{ uint64_t x34 = cmovznz(x32, x25, x16);
+{ uint64_t x35 = cmovznz(x32, x22, x13);
+out[0] = x33;
+out[1] = x34;
+out[2] = x35;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feadd.h b/src/Specific/montgomery64_2e192m2e64m1/feadd.h
new file mode 100644
index 000000000..0e595561e
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e192m2e64m1/femul.c b/src/Specific/montgomery64_2e192m2e64m1/femul.c
new file mode 100644
index 000000000..b6a9f8dce
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/femul.c
@@ -0,0 +1,89 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x14; uint64_t x13 = _mulx_u64(x5, x9, &x14);
+{ uint64_t x17; uint64_t x16 = _mulx_u64(x5, x11, &x17);
+{ uint64_t x20; uint64_t x19 = _mulx_u64(x5, x10, &x20);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x14, x16, &x22);
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(x23, x17, x19, &x25);
+{ uint64_t x28; uint8_t _ = _addcarryx_u64(0x0, x26, x20, &x28);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x13, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x13, 0xfffffffffffffffeL, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x13, 0xffffffffffffffffL, &x38);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(0x0, x32, x34, &x40);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x35, x37, &x43);
+{ uint64_t x46; uint8_t _ = _addcarryx_u64(0x0, x44, x38, &x46);
+{ uint64_t _; uint8_t x50 = _addcarryx_u64(0x0, x13, x31, &_);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x22, x40, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x25, x43, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x28, x46, &x58);
+{ uint64_t x62; uint64_t x61 = _mulx_u64(x7, x9, &x62);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x7, x11, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x7, x10, &x68);
+{ uint64_t x70; uint8_t x71 = _addcarryx_u64(0x0, x62, x64, &x70);
+{ uint64_t x73; uint8_t x74 = _addcarryx_u64(x71, x65, x67, &x73);
+{ uint64_t x76; uint8_t _ = _addcarryx_u64(0x0, x74, x68, &x76);
+{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x52, x61, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x55, x70, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x58, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x59, x76, &x88);
+{ uint64_t x92; uint64_t x91 = _mulx_u64(x79, 0xffffffffffffffffL, &x92);
+{ uint64_t x95; uint64_t x94 = _mulx_u64(x79, 0xfffffffffffffffeL, &x95);
+{ uint64_t x98; uint64_t x97 = _mulx_u64(x79, 0xffffffffffffffffL, &x98);
+{ uint64_t x100; uint8_t x101 = _addcarryx_u64(0x0, x92, x94, &x100);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x95, x97, &x103);
+{ uint64_t x106; uint8_t _ = _addcarryx_u64(0x0, x104, x98, &x106);
+{ uint64_t _; uint8_t x110 = _addcarryx_u64(0x0, x79, x91, &_);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x82, x100, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x85, x103, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x88, x106, &x118);
+{ uint8_t x120 = (x119 + x89);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x6, x9, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x6, x11, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x6, x10, &x129);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x123, x125, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x126, x128, &x134);
+{ uint64_t x137; uint8_t _ = _addcarryx_u64(0x0, x135, x129, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(0x0, x112, x122, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x115, x131, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x118, x134, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x120, x137, &x149);
+{ uint64_t x153; uint64_t x152 = _mulx_u64(x140, 0xffffffffffffffffL, &x153);
+{ uint64_t x156; uint64_t x155 = _mulx_u64(x140, 0xfffffffffffffffeL, &x156);
+{ uint64_t x159; uint64_t x158 = _mulx_u64(x140, 0xffffffffffffffffL, &x159);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(0x0, x153, x155, &x161);
+{ uint64_t x164; uint8_t x165 = _addcarryx_u64(x162, x156, x158, &x164);
+{ uint64_t x167; uint8_t _ = _addcarryx_u64(0x0, x165, x159, &x167);
+{ uint64_t _; uint8_t x171 = _addcarryx_u64(0x0, x140, x152, &_);
+{ uint64_t x173; uint8_t x174 = _addcarryx_u64(x171, x143, x161, &x173);
+{ uint64_t x176; uint8_t x177 = _addcarryx_u64(x174, x146, x164, &x176);
+{ uint64_t x179; uint8_t x180 = _addcarryx_u64(x177, x149, x167, &x179);
+{ uint8_t x181 = (x180 + x150);
+{ uint64_t x183; uint8_t x184 = _subborrow_u64(0x0, x173, 0xffffffffffffffffL, &x183);
+{ uint64_t x186; uint8_t x187 = _subborrow_u64(x184, x176, 0xfffffffffffffffeL, &x186);
+{ uint64_t x189; uint8_t x190 = _subborrow_u64(x187, x179, 0xffffffffffffffffL, &x189);
+{ uint64_t _; uint8_t x193 = _subborrow_u64(x190, x181, 0x0, &_);
+{ uint64_t x194 = cmovznz(x193, x189, x179);
+{ uint64_t x195 = cmovznz(x193, x186, x176);
+{ uint64_t x196 = cmovznz(x193, x183, x173);
+out[0] = x194;
+out[1] = x195;
+out[2] = x196;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e192m2e64m1/femul.h b/src/Specific/montgomery64_2e192m2e64m1/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fenz.c b/src/Specific/montgomery64_2e192m2e64m1/fenz.c
new file mode 100644
index 000000000..e29935ffc
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/fenz.c
@@ -0,0 +1,24 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x5 = (x4 | x3);
+{ uint64_t x6 = (x2 | x5);
+out[0] = x6;
+}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fenz.h b/src/Specific/montgomery64_2e192m2e64m1/fenz.h
new file mode 100644
index 000000000..5d5d18390
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feopp.c b/src/Specific/montgomery64_2e192m2e64m1/feopp.c
new file mode 100644
index 000000000..1c0de56c9
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/feopp.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint64_t x6; uint8_t x7 = _subborrow_u64(0x0, 0x0, x2, &x6);
+{ uint64_t x9; uint8_t x10 = _subborrow_u64(x7, 0x0, x4, &x9);
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(x10, 0x0, x3, &x12);
+{ uint64_t x14 = (uint64_t)cmovznz(x13, 0x0, 0xffffffffffffffffL);
+{ uint64_t x15 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x6, x15, &x17);
+{ uint64_t x19 = (x14 & 0xfffffffffffffffeL);
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(x18, x9, x19, &x21);
+{ uint64_t x23 = (x14 & 0xffffffffffffffffL);
+{ uint64_t x25; uint8_t _ = _addcarryx_u64(x22, x12, x23, &x25);
+out[0] = x25;
+out[1] = x21;
+out[2] = x17;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e192m2e64m1/feopp.h b/src/Specific/montgomery64_2e192m2e64m1/feopp.h
new file mode 100644
index 000000000..f75bc520e
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fesub.c b/src/Specific/montgomery64_2e192m2e64m1/fesub.c
new file mode 100644
index 000000000..0b3c8b1c0
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/fesub.c
@@ -0,0 +1,34 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(0x0, x5, x9, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, x7, x11, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, x6, x10, &x19);
+{ uint64_t x21 = (uint64_t)cmovznz(x20, 0x0, 0xffffffffffffffffL);
+{ uint64_t x22 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(0x0, x13, x22, &x24);
+{ uint64_t x26 = (x21 & 0xfffffffffffffffeL);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x25, x16, x26, &x28);
+{ uint64_t x30 = (x21 & 0xffffffffffffffffL);
+{ uint64_t x32; uint8_t _ = _addcarryx_u64(x29, x19, x30, &x32);
+out[0] = x32;
+out[1] = x28;
+out[2] = x24;
+}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e192m2e64m1/fesub.h b/src/Specific/montgomery64_2e192m2e64m1/fesub.h
new file mode 100644
index 000000000..ed365ec76
--- /dev/null
+++ b/src/Specific/montgomery64_2e192m2e64m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/montgomery64_2e194m33/feadd.c b/src/Specific/montgomery64_2e194m33/feadd.c
new file mode 100644
index 000000000..077a1af52
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffdfL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e194m33/feadd.h b/src/Specific/montgomery64_2e194m33/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e194m33/femul.c b/src/Specific/montgomery64_2e194m33/femul.c
new file mode 100644
index 000000000..774ec81d8
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/femul.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf83e0f83e0f83e1, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffdfL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+out[0] = uint64_t x53;
+out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
+out[2] = 0x3;;
+}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e194m33/femul.h b/src/Specific/montgomery64_2e194m33/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e194m33/fenz.c b/src/Specific/montgomery64_2e194m33/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e194m33/fenz.h b/src/Specific/montgomery64_2e194m33/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e194m33/feopp.c b/src/Specific/montgomery64_2e194m33/feopp.c
new file mode 100644
index 000000000..0301c52c9
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffdfL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint8_t x32 = ((uint8_t)x19 & 0x3);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e194m33/feopp.h b/src/Specific/montgomery64_2e194m33/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e194m33/fesub.c b/src/Specific/montgomery64_2e194m33/fesub.c
new file mode 100644
index 000000000..9c5537644
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffdfL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint8_t x41 = ((uint8_t)x28 & 0x3);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e194m33/fesub.h b/src/Specific/montgomery64_2e194m33/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e194m33/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e196m15/feadd.c b/src/Specific/montgomery64_2e196m15/feadd.c
new file mode 100644
index 000000000..c70f970dc
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xf, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e196m15/feadd.h b/src/Specific/montgomery64_2e196m15/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e196m15/femul.c b/src/Specific/montgomery64_2e196m15/femul.c
new file mode 100644
index 000000000..61ba1a7e2
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/femul.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+out[0] = uint64_t x53;
+out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
+out[2] = 0xf;;
+}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e196m15/femul.h b/src/Specific/montgomery64_2e196m15/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e196m15/fenz.c b/src/Specific/montgomery64_2e196m15/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e196m15/fenz.h b/src/Specific/montgomery64_2e196m15/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e196m15/feopp.c b/src/Specific/montgomery64_2e196m15/feopp.c
new file mode 100644
index 000000000..c78613e0e
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffff1L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint8_t x32 = ((uint8_t)x19 & 0xf);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e196m15/feopp.h b/src/Specific/montgomery64_2e196m15/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e196m15/fesub.c b/src/Specific/montgomery64_2e196m15/fesub.c
new file mode 100644
index 000000000..982a6ed70
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffff1L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint8_t x41 = ((uint8_t)x28 & 0xf);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e196m15/fesub.h b/src/Specific/montgomery64_2e196m15/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e196m15/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e198m17/feadd.c b/src/Specific/montgomery64_2e198m17/feadd.c
new file mode 100644
index 000000000..7422664b9
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffefL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e198m17/feadd.h b/src/Specific/montgomery64_2e198m17/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e198m17/femul.c b/src/Specific/montgomery64_2e198m17/femul.c
new file mode 100644
index 000000000..0e4d843b8
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/femul.c
@@ -0,0 +1,36 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffefL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+out[0] = uint64_t x53;
+out[1] = uint8_t x54 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x41;
+out[2] = 0x3f;;
+}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e198m17/femul.h b/src/Specific/montgomery64_2e198m17/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e198m17/fenz.c b/src/Specific/montgomery64_2e198m17/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e198m17/fenz.h b/src/Specific/montgomery64_2e198m17/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e198m17/feopp.c b/src/Specific/montgomery64_2e198m17/feopp.c
new file mode 100644
index 000000000..67cf72890
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffefL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint8_t x32 = ((uint8_t)x19 & 0x3f);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e198m17/feopp.h b/src/Specific/montgomery64_2e198m17/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e198m17/fesub.c b/src/Specific/montgomery64_2e198m17/fesub.c
new file mode 100644
index 000000000..5e0aee748
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffefL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint8_t x41 = ((uint8_t)x28 & 0x3f);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e198m17/fesub.h b/src/Specific/montgomery64_2e198m17/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e198m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c
new file mode 100644
index 000000000..5757066bc
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x14bf, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feadd.h b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/femul.c b/src/Specific/montgomery64_2e205m45x2e198m1/femul.c
new file mode 100644
index 000000000..19f690c56
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/femul.c
@@ -0,0 +1,136 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x14bf, &x51);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x14bf, &x129);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+{ uint8_t x157 = (x156 + x117);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x14bf, &x208);
+{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+{ uint8_t x236 = (x235 + x196);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x14bf, &x287);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+{ uint8_t x315 = (x314 + x275);
+{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x14bf, &x326);
+{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+{ uint64_t x331 = cmovznz(x330, x326, x313);
+{ uint64_t x332 = cmovznz(x330, x323, x310);
+{ uint64_t x333 = cmovznz(x330, x320, x307);
+{ uint64_t x334 = cmovznz(x330, x317, x304);
+out[0] = x331;
+out[1] = x332;
+out[2] = x333;
+out[3] = x334;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/femul.h b/src/Specific/montgomery64_2e205m45x2e198m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fenz.h b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c
new file mode 100644
index 000000000..69835ea8e
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x14bf);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/feopp.h b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c
new file mode 100644
index 000000000..e25106ebb
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x14bf);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e205m45x2e198m1/fesub.h b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e205m45x2e198m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e206m5/feadd.c b/src/Specific/montgomery64_2e206m5/feadd.c
new file mode 100644
index 000000000..fe9a1b3ac
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e206m5/feadd.h b/src/Specific/montgomery64_2e206m5/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e206m5/femul.c b/src/Specific/montgomery64_2e206m5/femul.c
new file mode 100644
index 000000000..fdc1d8608
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e206m5/femul.h b/src/Specific/montgomery64_2e206m5/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e206m5/fenz.c b/src/Specific/montgomery64_2e206m5/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e206m5/fenz.h b/src/Specific/montgomery64_2e206m5/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e206m5/feopp.c b/src/Specific/montgomery64_2e206m5/feopp.c
new file mode 100644
index 000000000..b1b027290
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffffbL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x3fff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e206m5/feopp.h b/src/Specific/montgomery64_2e206m5/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e206m5/fesub.c b/src/Specific/montgomery64_2e206m5/fesub.c
new file mode 100644
index 000000000..1d752a013
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffffbL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x3fff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e206m5/fesub.h b/src/Specific/montgomery64_2e206m5/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e206m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e212m29/feadd.c b/src/Specific/montgomery64_2e212m29/feadd.c
new file mode 100644
index 000000000..65a6b69be
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe3L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xfffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e212m29/feadd.h b/src/Specific/montgomery64_2e212m29/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e212m29/femul.c b/src/Specific/montgomery64_2e212m29/femul.c
new file mode 100644
index 000000000..a021c5c22
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x34f72c234f72c235, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe3L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xfffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x34f72c234f72c235, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe3L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xfffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x34f72c234f72c235, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe3L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xfffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x34f72c234f72c235, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe3L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xfffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe3L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xfffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e212m29/femul.h b/src/Specific/montgomery64_2e212m29/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e212m29/fenz.c b/src/Specific/montgomery64_2e212m29/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e212m29/fenz.h b/src/Specific/montgomery64_2e212m29/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e212m29/feopp.c b/src/Specific/montgomery64_2e212m29/feopp.c
new file mode 100644
index 000000000..83b1091d5
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffe3L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xfffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e212m29/feopp.h b/src/Specific/montgomery64_2e212m29/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e212m29/fesub.c b/src/Specific/montgomery64_2e212m29/fesub.c
new file mode 100644
index 000000000..1bee2bc99
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffe3L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xfffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e212m29/fesub.h b/src/Specific/montgomery64_2e212m29/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e212m29/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e213m3/feadd.c b/src/Specific/montgomery64_2e213m3/feadd.c
new file mode 100644
index 000000000..39bcecccd
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e213m3/feadd.h b/src/Specific/montgomery64_2e213m3/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e213m3/femul.c b/src/Specific/montgomery64_2e213m3/femul.c
new file mode 100644
index 000000000..ed8e86b46
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e213m3/femul.h b/src/Specific/montgomery64_2e213m3/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e213m3/fenz.c b/src/Specific/montgomery64_2e213m3/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e213m3/fenz.h b/src/Specific/montgomery64_2e213m3/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e213m3/feopp.c b/src/Specific/montgomery64_2e213m3/feopp.c
new file mode 100644
index 000000000..8ea13e52a
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffffdL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x1fffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e213m3/feopp.h b/src/Specific/montgomery64_2e213m3/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e213m3/fesub.c b/src/Specific/montgomery64_2e213m3/fesub.c
new file mode 100644
index 000000000..1fd39286e
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffffdL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x1fffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e213m3/fesub.h b/src/Specific/montgomery64_2e213m3/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e213m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feadd.c b/src/Specific/montgomery64_2e216m2e108m1/feadd.c
new file mode 100644
index 000000000..2ca6e6702
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffefffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feadd.h b/src/Specific/montgomery64_2e216m2e108m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e216m2e108m1/femul.c b/src/Specific/montgomery64_2e216m2e108m1/femul.c
new file mode 100644
index 000000000..837a92173
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/femul.c
@@ -0,0 +1,136 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffefffffffffffL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffffff, &x51);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffefffffffffffL, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffffff, &x129);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+{ uint8_t x157 = (x156 + x117);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffefffffffffffL, &x202);
+{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffffff, &x208);
+{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+{ uint8_t x236 = (x235 + x196);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffefffffffffffL, &x281);
+{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffffff, &x287);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+{ uint8_t x315 = (x314 + x275);
+{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffefffffffffffL, &x320);
+{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffffff, &x326);
+{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+{ uint64_t x331 = cmovznz(x330, x326, x313);
+{ uint64_t x332 = cmovznz(x330, x323, x310);
+{ uint64_t x333 = cmovznz(x330, x320, x307);
+{ uint64_t x334 = cmovznz(x330, x317, x304);
+out[0] = x331;
+out[1] = x332;
+out[2] = x333;
+out[3] = x334;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e216m2e108m1/femul.h b/src/Specific/montgomery64_2e216m2e108m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fenz.c b/src/Specific/montgomery64_2e216m2e108m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fenz.h b/src/Specific/montgomery64_2e216m2e108m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feopp.c b/src/Specific/montgomery64_2e216m2e108m1/feopp.c
new file mode 100644
index 000000000..4a2e77a0c
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffefffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e216m2e108m1/feopp.h b/src/Specific/montgomery64_2e216m2e108m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fesub.c b/src/Specific/montgomery64_2e216m2e108m1/fesub.c
new file mode 100644
index 000000000..fdaea4034
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffefffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e216m2e108m1/fesub.h b/src/Specific/montgomery64_2e216m2e108m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e216m2e108m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e221m3/feadd.c b/src/Specific/montgomery64_2e221m3/feadd.c
new file mode 100644
index 000000000..77bc256ad
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffdL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x1fffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e221m3/feadd.h b/src/Specific/montgomery64_2e221m3/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e221m3/femul.c b/src/Specific/montgomery64_2e221m3/femul.c
new file mode 100644
index 000000000..2ba283618
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffdL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x1fffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffdL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x1fffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffdL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x1fffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffdL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x1fffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffdL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x1fffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e221m3/femul.h b/src/Specific/montgomery64_2e221m3/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e221m3/fenz.c b/src/Specific/montgomery64_2e221m3/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e221m3/fenz.h b/src/Specific/montgomery64_2e221m3/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e221m3/feopp.c b/src/Specific/montgomery64_2e221m3/feopp.c
new file mode 100644
index 000000000..2a8bfeaee
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffffdL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x1fffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e221m3/feopp.h b/src/Specific/montgomery64_2e221m3/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e221m3/fesub.c b/src/Specific/montgomery64_2e221m3/fesub.c
new file mode 100644
index 000000000..e2f5ff86c
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffffdL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x1fffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e221m3/fesub.h b/src/Specific/montgomery64_2e221m3/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e221m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e222m117/feadd.c b/src/Specific/montgomery64_2e222m117/feadd.c
new file mode 100644
index 000000000..f812c899d
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff8bL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e222m117/feadd.h b/src/Specific/montgomery64_2e222m117/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e222m117/femul.c b/src/Specific/montgomery64_2e222m117/femul.c
new file mode 100644
index 000000000..052e9c5e4
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcfdcfdcfdcfdcfddL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff8bL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcfdcfdcfdcfdcfddL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff8bL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcfdcfdcfdcfdcfddL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff8bL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcfdcfdcfdcfdcfddL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff8bL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff8bL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e222m117/femul.h b/src/Specific/montgomery64_2e222m117/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e222m117/fenz.c b/src/Specific/montgomery64_2e222m117/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e222m117/fenz.h b/src/Specific/montgomery64_2e222m117/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e222m117/feopp.c b/src/Specific/montgomery64_2e222m117/feopp.c
new file mode 100644
index 000000000..673d57575
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffff8bL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x3fffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e222m117/feopp.h b/src/Specific/montgomery64_2e222m117/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e222m117/fesub.c b/src/Specific/montgomery64_2e222m117/fesub.c
new file mode 100644
index 000000000..9d08573c6
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffff8bL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x3fffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e222m117/fesub.h b/src/Specific/montgomery64_2e222m117/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e222m117/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feadd.c b/src/Specific/montgomery64_2e224m2e96p1/feadd.c
new file mode 100644
index 000000000..963bba047
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0x1, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff00000000L, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feadd.h b/src/Specific/montgomery64_2e224m2e96p1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e224m2e96p1/femul.c b/src/Specific/montgomery64_2e224m2e96p1/femul.c
new file mode 100644
index 000000000..9e0c971d6
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/femul.c
@@ -0,0 +1,132 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffff00000000L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffff, &x51);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x45, x47, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x48, x50, &x56);
+{ uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x51, &x59);
+{ uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x44, &x65);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
+{ uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
+{ uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
+{ uint64_t _; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &_);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x116, 0xffffffff00000000L, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x116, 0xffffffffffffffffL, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x116, 0xffffffff, &x126);
+{ uint64_t x128; uint8_t x129 = _addcarryx_u64(0x0, x120, x122, &x128);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, x123, x125, &x131);
+{ uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x126, &x134);
+{ uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x119, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
+{ uint8_t x151 = (x150 + x114);
+{ uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
+{ uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
+{ uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
+{ uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
+{ uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
+{ uint64_t _; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &_);
+{ uint64_t x196; uint64_t x195 = _mulx_u64(x192, 0xffffffff00000000L, &x196);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x192, 0xffffffffffffffffL, &x199);
+{ uint64_t x202; uint64_t x201 = _mulx_u64(x192, 0xffffffff, &x202);
+{ uint64_t x204; uint8_t x205 = _addcarryx_u64(0x0, x196, x198, &x204);
+{ uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, x199, x201, &x207);
+{ uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x202, &x210);
+{ uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x195, &x216);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
+{ uint8_t x227 = (x226 + x190);
+{ uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
+{ uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
+{ uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
+{ uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
+{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
+{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
+{ uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
+{ uint64_t _; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &_);
+{ uint64_t x272; uint64_t x271 = _mulx_u64(x268, 0xffffffff00000000L, &x272);
+{ uint64_t x275; uint64_t x274 = _mulx_u64(x268, 0xffffffffffffffffL, &x275);
+{ uint64_t x278; uint64_t x277 = _mulx_u64(x268, 0xffffffff, &x278);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(0x0, x272, x274, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x275, x277, &x283);
+{ uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x278, &x286);
+{ uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x271, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
+{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
+{ uint8_t x303 = (x302 + x266);
+{ uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0x1, &x305);
+{ uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff00000000L, &x308);
+{ uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0xffffffffffffffffL, &x311);
+{ uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff, &x314);
+{ uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
+{ uint64_t x319 = cmovznz(x318, x314, x301);
+{ uint64_t x320 = cmovznz(x318, x311, x298);
+{ uint64_t x321 = cmovznz(x318, x308, x295);
+{ uint64_t x322 = cmovznz(x318, x305, x292);
+out[0] = x319;
+out[1] = x320;
+out[2] = x321;
+out[3] = x322;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e224m2e96p1/femul.h b/src/Specific/montgomery64_2e224m2e96p1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fenz.c b/src/Specific/montgomery64_2e224m2e96p1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fenz.h b/src/Specific/montgomery64_2e224m2e96p1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feopp.c b/src/Specific/montgomery64_2e224m2e96p1/feopp.c
new file mode 100644
index 000000000..ec5edf772
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint8_t x20 = ((uint8_t)x19 & 0x1);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffff00000000L);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e224m2e96p1/feopp.h b/src/Specific/montgomery64_2e224m2e96p1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fesub.c b/src/Specific/montgomery64_2e224m2e96p1/fesub.c
new file mode 100644
index 000000000..454d1c390
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint8_t x29 = ((uint8_t)x28 & 0x1);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffff00000000L);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e224m2e96p1/fesub.h b/src/Specific/montgomery64_2e224m2e96p1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e224m2e96p1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e226m5/feadd.c b/src/Specific/montgomery64_2e226m5/feadd.c
new file mode 100644
index 000000000..4ab8f6968
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffffbL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3ffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e226m5/feadd.h b/src/Specific/montgomery64_2e226m5/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e226m5/femul.c b/src/Specific/montgomery64_2e226m5/femul.c
new file mode 100644
index 000000000..fb1de9174
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xcccccccccccccccdL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffffbL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3ffffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xcccccccccccccccdL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffffbL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3ffffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xcccccccccccccccdL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffffbL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3ffffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xcccccccccccccccdL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffffbL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3ffffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffffbL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3ffffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e226m5/femul.h b/src/Specific/montgomery64_2e226m5/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e226m5/fenz.c b/src/Specific/montgomery64_2e226m5/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e226m5/fenz.h b/src/Specific/montgomery64_2e226m5/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e226m5/feopp.c b/src/Specific/montgomery64_2e226m5/feopp.c
new file mode 100644
index 000000000..edcbefbd9
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffffbL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x3ffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e226m5/feopp.h b/src/Specific/montgomery64_2e226m5/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e226m5/fesub.c b/src/Specific/montgomery64_2e226m5/fesub.c
new file mode 100644
index 000000000..6230b9891
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffffbL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x3ffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e226m5/fesub.h b/src/Specific/montgomery64_2e226m5/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e226m5/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e230m27/feadd.c b/src/Specific/montgomery64_2e230m27/feadd.c
new file mode 100644
index 000000000..b4afa6349
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffe5L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3fffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e230m27/feadd.h b/src/Specific/montgomery64_2e230m27/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e230m27/femul.c b/src/Specific/montgomery64_2e230m27/femul.c
new file mode 100644
index 000000000..bde43f07c
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x84bda12f684bda13L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffe5L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x3fffffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x84bda12f684bda13L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffe5L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x3fffffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x84bda12f684bda13L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffe5L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x3fffffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x84bda12f684bda13L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffe5L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x3fffffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffe5L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x3fffffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e230m27/femul.h b/src/Specific/montgomery64_2e230m27/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e230m27/fenz.c b/src/Specific/montgomery64_2e230m27/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e230m27/fenz.h b/src/Specific/montgomery64_2e230m27/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e230m27/feopp.c b/src/Specific/montgomery64_2e230m27/feopp.c
new file mode 100644
index 000000000..8dc2603ae
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffe5L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x3fffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e230m27/feopp.h b/src/Specific/montgomery64_2e230m27/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e230m27/fesub.c b/src/Specific/montgomery64_2e230m27/fesub.c
new file mode 100644
index 000000000..f593c02cc
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffe5L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x3fffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e230m27/fesub.h b/src/Specific/montgomery64_2e230m27/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e230m27/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e235m15/feadd.c b/src/Specific/montgomery64_2e235m15/feadd.c
new file mode 100644
index 000000000..eae9f264a
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff1L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e235m15/feadd.h b/src/Specific/montgomery64_2e235m15/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e235m15/femul.c b/src/Specific/montgomery64_2e235m15/femul.c
new file mode 100644
index 000000000..007baab53
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff1L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff1L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff1L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff1L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff1L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e235m15/femul.h b/src/Specific/montgomery64_2e235m15/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e235m15/fenz.c b/src/Specific/montgomery64_2e235m15/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e235m15/fenz.h b/src/Specific/montgomery64_2e235m15/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e235m15/feopp.c b/src/Specific/montgomery64_2e235m15/feopp.c
new file mode 100644
index 000000000..cc6fc1a86
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffff1L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7ffffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e235m15/feopp.h b/src/Specific/montgomery64_2e235m15/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e235m15/fesub.c b/src/Specific/montgomery64_2e235m15/fesub.c
new file mode 100644
index 000000000..c160c373d
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffff1L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7ffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e235m15/fesub.h b/src/Specific/montgomery64_2e235m15/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e235m15/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e243m9/feadd.c b/src/Specific/montgomery64_2e243m9/feadd.c
new file mode 100644
index 000000000..82067f831
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e243m9/feadd.h b/src/Specific/montgomery64_2e243m9/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e243m9/femul.c b/src/Specific/montgomery64_2e243m9/femul.c
new file mode 100644
index 000000000..f6ed60fc9
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e243m9/femul.h b/src/Specific/montgomery64_2e243m9/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e243m9/fenz.c b/src/Specific/montgomery64_2e243m9/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e243m9/fenz.h b/src/Specific/montgomery64_2e243m9/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e243m9/feopp.c b/src/Specific/montgomery64_2e243m9/feopp.c
new file mode 100644
index 000000000..7fcf38323
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffff7L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7ffffffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e243m9/feopp.h b/src/Specific/montgomery64_2e243m9/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e243m9/fesub.c b/src/Specific/montgomery64_2e243m9/fesub.c
new file mode 100644
index 000000000..e02f6adfa
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffff7L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7ffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e243m9/fesub.h b/src/Specific/montgomery64_2e243m9/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e243m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e251m9/feadd.c b/src/Specific/montgomery64_2e251m9/feadd.c
new file mode 100644
index 000000000..e6013e94a
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffff7L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7ffffffffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e251m9/feadd.h b/src/Specific/montgomery64_2e251m9/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e251m9/femul.c b/src/Specific/montgomery64_2e251m9/femul.c
new file mode 100644
index 000000000..c1a768f4d
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffff7L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7ffffffffffffff, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffff7L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7ffffffffffffff, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffff7L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7ffffffffffffff, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffff7L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7ffffffffffffff, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffff7L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7ffffffffffffff, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e251m9/femul.h b/src/Specific/montgomery64_2e251m9/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e251m9/fenz.c b/src/Specific/montgomery64_2e251m9/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e251m9/fenz.h b/src/Specific/montgomery64_2e251m9/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e251m9/feopp.c b/src/Specific/montgomery64_2e251m9/feopp.c
new file mode 100644
index 000000000..b01942c17
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffff7L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7ffffffffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e251m9/feopp.h b/src/Specific/montgomery64_2e251m9/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e251m9/fesub.c b/src/Specific/montgomery64_2e251m9/fesub.c
new file mode 100644
index 000000000..450aeb7aa
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffff7L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7ffffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e251m9/fesub.h b/src/Specific/montgomery64_2e251m9/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e251m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c
new file mode 100644
index 000000000..d080f0462
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x3f80ffffffffffff, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feadd.h b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/femul.c b/src/Specific/montgomery64_2e254m127x2e240m1/femul.c
new file mode 100644
index 000000000..2793eb2b1
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/femul.c
@@ -0,0 +1,136 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0x3f80ffffffffffff, &x51);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0x3f80ffffffffffff, &x129);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+{ uint8_t x157 = (x156 + x117);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0x3f80ffffffffffff, &x208);
+{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+{ uint8_t x236 = (x235 + x196);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0x3f80ffffffffffff, &x287);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+{ uint8_t x315 = (x314 + x275);
+{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0x3f80ffffffffffff, &x326);
+{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+{ uint64_t x331 = cmovznz(x330, x326, x313);
+{ uint64_t x332 = cmovznz(x330, x323, x310);
+{ uint64_t x333 = cmovznz(x330, x320, x307);
+{ uint64_t x334 = cmovznz(x330, x317, x304);
+out[0] = x331;
+out[1] = x332;
+out[2] = x333;
+out[3] = x334;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/femul.h b/src/Specific/montgomery64_2e254m127x2e240m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fenz.h b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c
new file mode 100644
index 000000000..4114a0200
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x3f80ffffffffffff);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/feopp.h b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c
new file mode 100644
index 000000000..b2a839023
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x3f80ffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e254m127x2e240m1/fesub.h b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e254m127x2e240m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m19/feadd.c b/src/Specific/montgomery64_2e255m19/feadd.c
new file mode 100644
index 000000000..82ea51b0d
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m19/feadd.h b/src/Specific/montgomery64_2e255m19/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m19/femul.c b/src/Specific/montgomery64_2e255m19/femul.c
new file mode 100644
index 000000000..eac0818f5
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m19/femul.h b/src/Specific/montgomery64_2e255m19/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m19/fenz.c b/src/Specific/montgomery64_2e255m19/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e255m19/fenz.h b/src/Specific/montgomery64_2e255m19/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m19/feopp.c b/src/Specific/montgomery64_2e255m19/feopp.c
new file mode 100644
index 000000000..d47a8efe4
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffedL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m19/feopp.h b/src/Specific/montgomery64_2e255m19/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m19/fesub.c b/src/Specific/montgomery64_2e255m19/fesub.c
new file mode 100644
index 000000000..0f8d12797
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffedL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m19/fesub.h b/src/Specific/montgomery64_2e255m19/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c
new file mode 100644
index 000000000..82ea51b0d
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffedL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.h b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c
new file mode 100644
index 000000000..eac0818f5
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0x86bca1af286bca1bL, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffffedL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0x86bca1af286bca1bL, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffffedL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0x86bca1af286bca1bL, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffffedL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0x86bca1af286bca1bL, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffffedL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffffedL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.h b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.h b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c
new file mode 100644
index 000000000..d47a8efe4
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffedL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.h b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c
new file mode 100644
index 000000000..0f8d12797
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffedL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.h b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m2e4m2e1m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m765/feadd.c b/src/Specific/montgomery64_2e255m765/feadd.c
new file mode 100644
index 000000000..304403933
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffffffffd03L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0x7fffffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m765/feadd.h b/src/Specific/montgomery64_2e255m765/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m765/femul.c b/src/Specific/montgomery64_2e255m765/femul.c
new file mode 100644
index 000000000..232dec347
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xaa54ffaa54ffaa55L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffffffffd03L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0x7fffffffffffffffL, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xaa54ffaa54ffaa55L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffffffffd03L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0x7fffffffffffffffL, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xaa54ffaa54ffaa55L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffffffffd03L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0x7fffffffffffffffL, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xaa54ffaa54ffaa55L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffffffffd03L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0x7fffffffffffffffL, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffffffffd03L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0x7fffffffffffffffL, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m765/femul.h b/src/Specific/montgomery64_2e255m765/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e255m765/fenz.c b/src/Specific/montgomery64_2e255m765/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e255m765/fenz.h b/src/Specific/montgomery64_2e255m765/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m765/feopp.c b/src/Specific/montgomery64_2e255m765/feopp.c
new file mode 100644
index 000000000..af36ffb7b
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffffffffd03L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0x7fffffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m765/feopp.h b/src/Specific/montgomery64_2e255m765/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e255m765/fesub.c b/src/Specific/montgomery64_2e255m765/fesub.c
new file mode 100644
index 000000000..90f0ffa99
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffffffffd03L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0x7fffffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e255m765/fesub.h b/src/Specific/montgomery64_2e255m765/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e255m765/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m189/feadd.c b/src/Specific/montgomery64_2e256m189/feadd.c
new file mode 100644
index 000000000..d35e30e75
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffff43L, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m189/feadd.h b/src/Specific/montgomery64_2e256m189/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m189/femul.c b/src/Specific/montgomery64_2e256m189/femul.c
new file mode 100644
index 000000000..214f6b0ef
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xa53fa94fea53fa95L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xffffffffffffff43L, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xa53fa94fea53fa95L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xffffffffffffff43L, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xa53fa94fea53fa95L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xffffffffffffff43L, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xa53fa94fea53fa95L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xffffffffffffff43L, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xffffffffffffff43L, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m189/femul.h b/src/Specific/montgomery64_2e256m189/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m189/fenz.c b/src/Specific/montgomery64_2e256m189/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e256m189/fenz.h b/src/Specific/montgomery64_2e256m189/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m189/feopp.c b/src/Specific/montgomery64_2e256m189/feopp.c
new file mode 100644
index 000000000..270aa6700
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffff43L);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m189/feopp.h b/src/Specific/montgomery64_2e256m189/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m189/fesub.c b/src/Specific/montgomery64_2e256m189/fesub.c
new file mode 100644
index 000000000..251a20c47
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffff43L);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m189/fesub.h b/src/Specific/montgomery64_2e256m189/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m189/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c
new file mode 100644
index 000000000..629cca0a0
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffff, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0x0, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffff00000001L, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.h b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c
new file mode 100644
index 000000000..7e33d1870
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.c
@@ -0,0 +1,132 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffff, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffff00000001L, &x48);
+{ uint64_t x50; uint8_t x51 = _addcarryx_u64(0x0, x42, x44, &x50);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x51, x45, 0x0, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, 0x0, x47, &x56);
+{ uint64_t x59; uint8_t _ = _addcarryx_u64(0x0, x57, x48, &x59);
+{ uint64_t _; uint8_t x63 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x65; uint8_t x66 = _addcarryx_u64(x63, x29, x50, &x65);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x32, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x35, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x38, x59, &x74);
+{ uint64_t x78; uint64_t x77 = _mulx_u64(x7, x11, &x78);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x13, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x15, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x14, &x87);
+{ uint64_t x89; uint8_t x90 = _addcarryx_u64(0x0, x78, x80, &x89);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(x90, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t _ = _addcarryx_u64(0x0, x96, x87, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(0x0, x65, x77, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(x102, x68, x89, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x75, x98, &x113);
+{ uint64_t x117; uint64_t x116 = _mulx_u64(x101, 0xffffffffffffffffL, &x117);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x101, 0xffffffff, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x101, 0xffffffff00000001L, &x123);
+{ uint64_t x125; uint8_t x126 = _addcarryx_u64(0x0, x117, x119, &x125);
+{ uint64_t x128; uint8_t x129 = _addcarryx_u64(x126, x120, 0x0, &x128);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(x129, 0x0, x122, &x131);
+{ uint64_t x134; uint8_t _ = _addcarryx_u64(0x0, x132, x123, &x134);
+{ uint64_t _; uint8_t x138 = _addcarryx_u64(0x0, x101, x116, &_);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x104, x125, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x107, x128, &x143);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x110, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x113, x134, &x149);
+{ uint8_t x151 = (x150 + x114);
+{ uint64_t x154; uint64_t x153 = _mulx_u64(x9, x11, &x154);
+{ uint64_t x157; uint64_t x156 = _mulx_u64(x9, x13, &x157);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x15, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x14, &x163);
+{ uint64_t x165; uint8_t x166 = _addcarryx_u64(0x0, x154, x156, &x165);
+{ uint64_t x168; uint8_t x169 = _addcarryx_u64(x166, x157, x159, &x168);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x160, x162, &x171);
+{ uint64_t x174; uint8_t _ = _addcarryx_u64(0x0, x172, x163, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x140, x153, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x143, x165, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x146, x168, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x151, x174, &x189);
+{ uint64_t x193; uint64_t x192 = _mulx_u64(x177, 0xffffffffffffffffL, &x193);
+{ uint64_t x196; uint64_t x195 = _mulx_u64(x177, 0xffffffff, &x196);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x177, 0xffffffff00000001L, &x199);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(0x0, x193, x195, &x201);
+{ uint64_t x204; uint8_t x205 = _addcarryx_u64(x202, x196, 0x0, &x204);
+{ uint64_t x207; uint8_t x208 = _addcarryx_u64(x205, 0x0, x198, &x207);
+{ uint64_t x210; uint8_t _ = _addcarryx_u64(0x0, x208, x199, &x210);
+{ uint64_t _; uint8_t x214 = _addcarryx_u64(0x0, x177, x192, &_);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x180, x201, &x216);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(x217, x183, x204, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x186, x207, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x189, x210, &x225);
+{ uint8_t x227 = (x226 + x190);
+{ uint64_t x230; uint64_t x229 = _mulx_u64(x8, x11, &x230);
+{ uint64_t x233; uint64_t x232 = _mulx_u64(x8, x13, &x233);
+{ uint64_t x236; uint64_t x235 = _mulx_u64(x8, x15, &x236);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x14, &x239);
+{ uint64_t x241; uint8_t x242 = _addcarryx_u64(0x0, x230, x232, &x241);
+{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x233, x235, &x244);
+{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x236, x238, &x247);
+{ uint64_t x250; uint8_t _ = _addcarryx_u64(0x0, x248, x239, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(0x0, x216, x229, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x219, x241, &x256);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(x257, x222, x244, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x225, x247, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x227, x250, &x265);
+{ uint64_t x269; uint64_t x268 = _mulx_u64(x253, 0xffffffffffffffffL, &x269);
+{ uint64_t x272; uint64_t x271 = _mulx_u64(x253, 0xffffffff, &x272);
+{ uint64_t x275; uint64_t x274 = _mulx_u64(x253, 0xffffffff00000001L, &x275);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(0x0, x269, x271, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x272, 0x0, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, 0x0, x274, &x283);
+{ uint64_t x286; uint8_t _ = _addcarryx_u64(0x0, x284, x275, &x286);
+{ uint64_t _; uint8_t x290 = _addcarryx_u64(0x0, x253, x268, &_);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x256, x277, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x259, x280, &x295);
+{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x262, x283, &x298);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(x299, x265, x286, &x301);
+{ uint8_t x303 = (x302 + x266);
+{ uint64_t x305; uint8_t x306 = _subborrow_u64(0x0, x292, 0xffffffffffffffffL, &x305);
+{ uint64_t x308; uint8_t x309 = _subborrow_u64(x306, x295, 0xffffffff, &x308);
+{ uint64_t x311; uint8_t x312 = _subborrow_u64(x309, x298, 0x0, &x311);
+{ uint64_t x314; uint8_t x315 = _subborrow_u64(x312, x301, 0xffffffff00000001L, &x314);
+{ uint64_t _; uint8_t x318 = _subborrow_u64(x315, x303, 0x0, &_);
+{ uint64_t x319 = cmovznz(x318, x314, x301);
+{ uint64_t x320 = cmovznz(x318, x311, x298);
+{ uint64_t x321 = cmovznz(x318, x308, x295);
+{ uint64_t x322 = cmovznz(x318, x305, x292);
+out[0] = x319;
+out[1] = x320;
+out[2] = x321;
+out[3] = x322;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.h b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.h b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c
new file mode 100644
index 000000000..90f64739b
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffff);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(x27, x14, 0x0, &x29);
+{ uint64_t x31 = (x19 & 0xffffffff00000001L);
+{ uint64_t x33; uint8_t _ = _addcarryx_u64(x30, x17, x31, &x33);
+out[0] = x33;
+out[1] = x29;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.h b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c
new file mode 100644
index 000000000..8f286af64
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.c
@@ -0,0 +1,37 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffff);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x23, 0x0, &x38);
+{ uint64_t x40 = (x28 & 0xffffffff00000001L);
+{ uint64_t x42; uint8_t _ = _addcarryx_u64(x39, x26, x40, &x42);
+out[0] = x42;
+out[1] = x38;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.h b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e224p2e192p2e96m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feadd.c b/src/Specific/montgomery64_2e256m2e32m977/feadd.c
new file mode 100644
index 000000000..c16909cf3
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xfffffffefffffc2fL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffffffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feadd.h b/src/Specific/montgomery64_2e256m2e32m977/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e32m977/femul.c b/src/Specific/montgomery64_2e256m2e32m977/femul.c
new file mode 100644
index 000000000..3905eddb0
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/femul.c
@@ -0,0 +1,140 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t _; uint64_t x41 = _mulx_u64(x17, 0xd838091dd2253531L, &_);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x41, 0xfffffffefffffc2fL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x41, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x41, 0xffffffffffffffffL, &x51);
+{ uint64_t x54; uint64_t x53 = _mulx_u64(x41, 0xffffffffffffffffL, &x54);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(0x0, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t x63 = _addcarryx_u64(x60, x51, x53, &x62);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(0x0, x63, x54, &x65);
+{ uint64_t _; uint8_t x69 = _addcarryx_u64(0x0, x17, x44, &_);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x29, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x32, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x35, x62, &x77);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u64(x78, x38, x65, &x80);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x11, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x13, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x15, &x90);
+{ uint64_t x93; uint64_t x92 = _mulx_u64(x7, x14, &x93);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(0x0, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t x102 = _addcarryx_u64(x99, x90, x92, &x101);
+{ uint64_t x104; uint8_t _ = _addcarryx_u64(0x0, x102, x93, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(0x0, x71, x83, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x80, x101, &x116);
+{ uint64_t x119; uint8_t x120 = _addcarryx_u64(x117, x81, x104, &x119);
+{ uint64_t _; uint64_t x122 = _mulx_u64(x107, 0xd838091dd2253531L, &_);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x122, 0xfffffffefffffc2fL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x122, 0xffffffffffffffffL, &x129);
+{ uint64_t x132; uint64_t x131 = _mulx_u64(x122, 0xffffffffffffffffL, &x132);
+{ uint64_t x135; uint64_t x134 = _mulx_u64(x122, 0xffffffffffffffffL, &x135);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(0x0, x126, x128, &x137);
+{ uint64_t x140; uint8_t x141 = _addcarryx_u64(x138, x129, x131, &x140);
+{ uint64_t x143; uint8_t x144 = _addcarryx_u64(x141, x132, x134, &x143);
+{ uint64_t x146; uint8_t _ = _addcarryx_u64(0x0, x144, x135, &x146);
+{ uint64_t _; uint8_t x150 = _addcarryx_u64(0x0, x107, x125, &_);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x110, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x113, x140, &x155);
+{ uint64_t x158; uint8_t x159 = _addcarryx_u64(x156, x116, x143, &x158);
+{ uint64_t x161; uint8_t x162 = _addcarryx_u64(x159, x119, x146, &x161);
+{ uint8_t x163 = (x162 + x120);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x11, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x13, &x169);
+{ uint64_t x172; uint64_t x171 = _mulx_u64(x9, x15, &x172);
+{ uint64_t x175; uint64_t x174 = _mulx_u64(x9, x14, &x175);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(0x0, x166, x168, &x177);
+{ uint64_t x180; uint8_t x181 = _addcarryx_u64(x178, x169, x171, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(x181, x172, x174, &x183);
+{ uint64_t x186; uint8_t _ = _addcarryx_u64(0x0, x184, x175, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(0x0, x152, x165, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x158, x180, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x161, x183, &x198);
+{ uint64_t x201; uint8_t x202 = _addcarryx_u64(x199, x163, x186, &x201);
+{ uint64_t _; uint64_t x204 = _mulx_u64(x189, 0xd838091dd2253531L, &_);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x204, 0xfffffffefffffc2fL, &x208);
+{ uint64_t x211; uint64_t x210 = _mulx_u64(x204, 0xffffffffffffffffL, &x211);
+{ uint64_t x214; uint64_t x213 = _mulx_u64(x204, 0xffffffffffffffffL, &x214);
+{ uint64_t x217; uint64_t x216 = _mulx_u64(x204, 0xffffffffffffffffL, &x217);
+{ uint64_t x219; uint8_t x220 = _addcarryx_u64(0x0, x208, x210, &x219);
+{ uint64_t x222; uint8_t x223 = _addcarryx_u64(x220, x211, x213, &x222);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x214, x216, &x225);
+{ uint64_t x228; uint8_t _ = _addcarryx_u64(0x0, x226, x217, &x228);
+{ uint64_t _; uint8_t x232 = _addcarryx_u64(0x0, x189, x207, &_);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x192, x219, &x234);
+{ uint64_t x237; uint8_t x238 = _addcarryx_u64(x235, x195, x222, &x237);
+{ uint64_t x240; uint8_t x241 = _addcarryx_u64(x238, x198, x225, &x240);
+{ uint64_t x243; uint8_t x244 = _addcarryx_u64(x241, x201, x228, &x243);
+{ uint8_t x245 = (x244 + x202);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x11, &x248);
+{ uint64_t x251; uint64_t x250 = _mulx_u64(x8, x13, &x251);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x8, x15, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x8, x14, &x257);
+{ uint64_t x259; uint8_t x260 = _addcarryx_u64(0x0, x248, x250, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(x260, x251, x253, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x254, x256, &x265);
+{ uint64_t x268; uint8_t _ = _addcarryx_u64(0x0, x266, x257, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(0x0, x234, x247, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x237, x259, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x240, x262, &x277);
+{ uint64_t x280; uint8_t x281 = _addcarryx_u64(x278, x243, x265, &x280);
+{ uint64_t x283; uint8_t x284 = _addcarryx_u64(x281, x245, x268, &x283);
+{ uint64_t _; uint64_t x286 = _mulx_u64(x271, 0xd838091dd2253531L, &_);
+{ uint64_t x290; uint64_t x289 = _mulx_u64(x286, 0xfffffffefffffc2fL, &x290);
+{ uint64_t x293; uint64_t x292 = _mulx_u64(x286, 0xffffffffffffffffL, &x293);
+{ uint64_t x296; uint64_t x295 = _mulx_u64(x286, 0xffffffffffffffffL, &x296);
+{ uint64_t x299; uint64_t x298 = _mulx_u64(x286, 0xffffffffffffffffL, &x299);
+{ uint64_t x301; uint8_t x302 = _addcarryx_u64(0x0, x290, x292, &x301);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x293, x295, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x296, x298, &x307);
+{ uint64_t x310; uint8_t _ = _addcarryx_u64(0x0, x308, x299, &x310);
+{ uint64_t _; uint8_t x314 = _addcarryx_u64(0x0, x271, x289, &_);
+{ uint64_t x316; uint8_t x317 = _addcarryx_u64(x314, x274, x301, &x316);
+{ uint64_t x319; uint8_t x320 = _addcarryx_u64(x317, x277, x304, &x319);
+{ uint64_t x322; uint8_t x323 = _addcarryx_u64(x320, x280, x307, &x322);
+{ uint64_t x325; uint8_t x326 = _addcarryx_u64(x323, x283, x310, &x325);
+{ uint8_t x327 = (x326 + x284);
+{ uint64_t x329; uint8_t x330 = _subborrow_u64(0x0, x316, 0xfffffffefffffc2fL, &x329);
+{ uint64_t x332; uint8_t x333 = _subborrow_u64(x330, x319, 0xffffffffffffffffL, &x332);
+{ uint64_t x335; uint8_t x336 = _subborrow_u64(x333, x322, 0xffffffffffffffffL, &x335);
+{ uint64_t x338; uint8_t x339 = _subborrow_u64(x336, x325, 0xffffffffffffffffL, &x338);
+{ uint64_t _; uint8_t x342 = _subborrow_u64(x339, x327, 0x0, &_);
+{ uint64_t x343 = cmovznz(x342, x338, x325);
+{ uint64_t x344 = cmovznz(x342, x335, x322);
+{ uint64_t x345 = cmovznz(x342, x332, x319);
+{ uint64_t x346 = cmovznz(x342, x329, x316);
+out[0] = x343;
+out[1] = x344;
+out[2] = x345;
+out[3] = x346;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e32m977/femul.h b/src/Specific/montgomery64_2e256m2e32m977/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fenz.c b/src/Specific/montgomery64_2e256m2e32m977/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fenz.h b/src/Specific/montgomery64_2e256m2e32m977/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feopp.c b/src/Specific/montgomery64_2e256m2e32m977/feopp.c
new file mode 100644
index 000000000..042716ff8
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xfffffffefffffc2fL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e32m977/feopp.h b/src/Specific/montgomery64_2e256m2e32m977/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fesub.c b/src/Specific/montgomery64_2e256m2e32m977/fesub.c
new file mode 100644
index 000000000..d1a9a043e
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xfffffffefffffc2fL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m2e32m977/fesub.h b/src/Specific/montgomery64_2e256m2e32m977/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m2e32m977/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c
new file mode 100644
index 000000000..59e978a69
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _addcarryx_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _addcarryx_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _addcarryx_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x24, x8, x14, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u64(0x0, x17, 0xffffffffffffffffL, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u64(x30, x20, 0xffffffffffffffffL, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u64(x33, x23, 0xffffffffffffffffL, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u64(x36, x26, 0xffa7ffffffffffffL, &x38);
+{ uint64_t _; uint8_t x42 = _subborrow_u64(x39, x27, 0x0, &_);
+{ uint64_t x43 = cmovznz(x42, x38, x26);
+{ uint64_t x44 = cmovznz(x42, x35, x23);
+{ uint64_t x45 = cmovznz(x42, x32, x20);
+{ uint64_t x46 = cmovznz(x42, x29, x17);
+out[0] = x43;
+out[1] = x44;
+out[2] = x45;
+out[3] = x46;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feadd.h b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.h
new file mode 100644
index 000000000..8c4dc2186
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/femul.c b/src/Specific/montgomery64_2e256m88x2e240m1/femul.c
new file mode 100644
index 000000000..2382c2c35
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/femul.c
@@ -0,0 +1,136 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x18; uint64_t x17 = _mulx_u64(x5, x11, &x18);
+{ uint64_t x21; uint64_t x20 = _mulx_u64(x5, x13, &x21);
+{ uint64_t x24; uint64_t x23 = _mulx_u64(x5, x15, &x24);
+{ uint64_t x27; uint64_t x26 = _mulx_u64(x5, x14, &x27);
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x18, x20, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x21, x23, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x24, x26, &x35);
+{ uint64_t x38; uint8_t _ = _addcarryx_u64(0x0, x36, x27, &x38);
+{ uint64_t x42; uint64_t x41 = _mulx_u64(x17, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint64_t x44 = _mulx_u64(x17, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint64_t x47 = _mulx_u64(x17, 0xffffffffffffffffL, &x48);
+{ uint64_t x51; uint64_t x50 = _mulx_u64(x17, 0xffa7ffffffffffffL, &x51);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(0x0, x42, x44, &x53);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u64(x54, x45, x47, &x56);
+{ uint64_t x59; uint8_t x60 = _addcarryx_u64(x57, x48, x50, &x59);
+{ uint64_t x62; uint8_t _ = _addcarryx_u64(0x0, x60, x51, &x62);
+{ uint64_t _; uint8_t x66 = _addcarryx_u64(0x0, x17, x41, &_);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u64(x66, x29, x53, &x68);
+{ uint64_t x71; uint8_t x72 = _addcarryx_u64(x69, x32, x56, &x71);
+{ uint64_t x74; uint8_t x75 = _addcarryx_u64(x72, x35, x59, &x74);
+{ uint64_t x77; uint8_t x78 = _addcarryx_u64(x75, x38, x62, &x77);
+{ uint64_t x81; uint64_t x80 = _mulx_u64(x7, x11, &x81);
+{ uint64_t x84; uint64_t x83 = _mulx_u64(x7, x13, &x84);
+{ uint64_t x87; uint64_t x86 = _mulx_u64(x7, x15, &x87);
+{ uint64_t x90; uint64_t x89 = _mulx_u64(x7, x14, &x90);
+{ uint64_t x92; uint8_t x93 = _addcarryx_u64(0x0, x81, x83, &x92);
+{ uint64_t x95; uint8_t x96 = _addcarryx_u64(x93, x84, x86, &x95);
+{ uint64_t x98; uint8_t x99 = _addcarryx_u64(x96, x87, x89, &x98);
+{ uint64_t x101; uint8_t _ = _addcarryx_u64(0x0, x99, x90, &x101);
+{ uint64_t x104; uint8_t x105 = _addcarryx_u64(0x0, x68, x80, &x104);
+{ uint64_t x107; uint8_t x108 = _addcarryx_u64(x105, x71, x92, &x107);
+{ uint64_t x110; uint8_t x111 = _addcarryx_u64(x108, x74, x95, &x110);
+{ uint64_t x113; uint8_t x114 = _addcarryx_u64(x111, x77, x98, &x113);
+{ uint64_t x116; uint8_t x117 = _addcarryx_u64(x114, x78, x101, &x116);
+{ uint64_t x120; uint64_t x119 = _mulx_u64(x104, 0xffffffffffffffffL, &x120);
+{ uint64_t x123; uint64_t x122 = _mulx_u64(x104, 0xffffffffffffffffL, &x123);
+{ uint64_t x126; uint64_t x125 = _mulx_u64(x104, 0xffffffffffffffffL, &x126);
+{ uint64_t x129; uint64_t x128 = _mulx_u64(x104, 0xffa7ffffffffffffL, &x129);
+{ uint64_t x131; uint8_t x132 = _addcarryx_u64(0x0, x120, x122, &x131);
+{ uint64_t x134; uint8_t x135 = _addcarryx_u64(x132, x123, x125, &x134);
+{ uint64_t x137; uint8_t x138 = _addcarryx_u64(x135, x126, x128, &x137);
+{ uint64_t x140; uint8_t _ = _addcarryx_u64(0x0, x138, x129, &x140);
+{ uint64_t _; uint8_t x144 = _addcarryx_u64(0x0, x104, x119, &_);
+{ uint64_t x146; uint8_t x147 = _addcarryx_u64(x144, x107, x131, &x146);
+{ uint64_t x149; uint8_t x150 = _addcarryx_u64(x147, x110, x134, &x149);
+{ uint64_t x152; uint8_t x153 = _addcarryx_u64(x150, x113, x137, &x152);
+{ uint64_t x155; uint8_t x156 = _addcarryx_u64(x153, x116, x140, &x155);
+{ uint8_t x157 = (x156 + x117);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x9, x11, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x9, x13, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x9, x15, &x166);
+{ uint64_t x169; uint64_t x168 = _mulx_u64(x9, x14, &x169);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(0x0, x160, x162, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x163, x165, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x166, x168, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x169, &x180);
+{ uint64_t x183; uint8_t x184 = _addcarryx_u64(0x0, x146, x159, &x183);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x149, x171, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x152, x174, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x155, x177, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x157, x180, &x195);
+{ uint64_t x199; uint64_t x198 = _mulx_u64(x183, 0xffffffffffffffffL, &x199);
+{ uint64_t x202; uint64_t x201 = _mulx_u64(x183, 0xffffffffffffffffL, &x202);
+{ uint64_t x205; uint64_t x204 = _mulx_u64(x183, 0xffffffffffffffffL, &x205);
+{ uint64_t x208; uint64_t x207 = _mulx_u64(x183, 0xffa7ffffffffffffL, &x208);
+{ uint64_t x210; uint8_t x211 = _addcarryx_u64(0x0, x199, x201, &x210);
+{ uint64_t x213; uint8_t x214 = _addcarryx_u64(x211, x202, x204, &x213);
+{ uint64_t x216; uint8_t x217 = _addcarryx_u64(x214, x205, x207, &x216);
+{ uint64_t x219; uint8_t _ = _addcarryx_u64(0x0, x217, x208, &x219);
+{ uint64_t _; uint8_t x223 = _addcarryx_u64(0x0, x183, x198, &_);
+{ uint64_t x225; uint8_t x226 = _addcarryx_u64(x223, x186, x210, &x225);
+{ uint64_t x228; uint8_t x229 = _addcarryx_u64(x226, x189, x213, &x228);
+{ uint64_t x231; uint8_t x232 = _addcarryx_u64(x229, x192, x216, &x231);
+{ uint64_t x234; uint8_t x235 = _addcarryx_u64(x232, x195, x219, &x234);
+{ uint8_t x236 = (x235 + x196);
+{ uint64_t x239; uint64_t x238 = _mulx_u64(x8, x11, &x239);
+{ uint64_t x242; uint64_t x241 = _mulx_u64(x8, x13, &x242);
+{ uint64_t x245; uint64_t x244 = _mulx_u64(x8, x15, &x245);
+{ uint64_t x248; uint64_t x247 = _mulx_u64(x8, x14, &x248);
+{ uint64_t x250; uint8_t x251 = _addcarryx_u64(0x0, x239, x241, &x250);
+{ uint64_t x253; uint8_t x254 = _addcarryx_u64(x251, x242, x244, &x253);
+{ uint64_t x256; uint8_t x257 = _addcarryx_u64(x254, x245, x247, &x256);
+{ uint64_t x259; uint8_t _ = _addcarryx_u64(0x0, x257, x248, &x259);
+{ uint64_t x262; uint8_t x263 = _addcarryx_u64(0x0, x225, x238, &x262);
+{ uint64_t x265; uint8_t x266 = _addcarryx_u64(x263, x228, x250, &x265);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(x266, x231, x253, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x234, x256, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x236, x259, &x274);
+{ uint64_t x278; uint64_t x277 = _mulx_u64(x262, 0xffffffffffffffffL, &x278);
+{ uint64_t x281; uint64_t x280 = _mulx_u64(x262, 0xffffffffffffffffL, &x281);
+{ uint64_t x284; uint64_t x283 = _mulx_u64(x262, 0xffffffffffffffffL, &x284);
+{ uint64_t x287; uint64_t x286 = _mulx_u64(x262, 0xffa7ffffffffffffL, &x287);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(0x0, x278, x280, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x281, x283, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x284, x286, &x295);
+{ uint64_t x298; uint8_t _ = _addcarryx_u64(0x0, x296, x287, &x298);
+{ uint64_t _; uint8_t x302 = _addcarryx_u64(0x0, x262, x277, &_);
+{ uint64_t x304; uint8_t x305 = _addcarryx_u64(x302, x265, x289, &x304);
+{ uint64_t x307; uint8_t x308 = _addcarryx_u64(x305, x268, x292, &x307);
+{ uint64_t x310; uint8_t x311 = _addcarryx_u64(x308, x271, x295, &x310);
+{ uint64_t x313; uint8_t x314 = _addcarryx_u64(x311, x274, x298, &x313);
+{ uint8_t x315 = (x314 + x275);
+{ uint64_t x317; uint8_t x318 = _subborrow_u64(0x0, x304, 0xffffffffffffffffL, &x317);
+{ uint64_t x320; uint8_t x321 = _subborrow_u64(x318, x307, 0xffffffffffffffffL, &x320);
+{ uint64_t x323; uint8_t x324 = _subborrow_u64(x321, x310, 0xffffffffffffffffL, &x323);
+{ uint64_t x326; uint8_t x327 = _subborrow_u64(x324, x313, 0xffa7ffffffffffffL, &x326);
+{ uint64_t _; uint8_t x330 = _subborrow_u64(x327, x315, 0x0, &_);
+{ uint64_t x331 = cmovznz(x330, x326, x313);
+{ uint64_t x332 = cmovznz(x330, x323, x310);
+{ uint64_t x333 = cmovznz(x330, x320, x307);
+{ uint64_t x334 = cmovznz(x330, x317, x304);
+out[0] = x331;
+out[1] = x332;
+out[2] = x333;
+out[3] = x334;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/femul.h b/src/Specific/montgomery64_2e256m88x2e240m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c
new file mode 100644
index 000000000..51bde0513
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x7 = (x6 | x5);
+{ uint64_t x8 = (x4 | x7);
+{ uint64_t x9 = (x2 | x8);
+out[0] = x9;
+}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fenz.h b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.h
new file mode 100644
index 000000000..de4c849f7
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c
new file mode 100644
index 000000000..334a368a5
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x8; uint8_t x9 = _subborrow_u64(0x0, 0x0, x2, &x8);
+{ uint64_t x11; uint8_t x12 = _subborrow_u64(x9, 0x0, x4, &x11);
+{ uint64_t x14; uint8_t x15 = _subborrow_u64(x12, 0x0, x6, &x14);
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(x15, 0x0, x5, &x17);
+{ uint64_t x19 = (uint64_t)cmovznz(x18, 0x0, 0xffffffffffffffffL);
+{ uint64_t x20 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x22; uint8_t x23 = _addcarryx_u64(0x0, x8, x20, &x22);
+{ uint64_t x24 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x26; uint8_t x27 = _addcarryx_u64(x23, x11, x24, &x26);
+{ uint64_t x28 = (x19 & 0xffffffffffffffffL);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x27, x14, x28, &x30);
+{ uint64_t x32 = (x19 & 0xffa7ffffffffffffL);
+{ uint64_t x34; uint8_t _ = _addcarryx_u64(x31, x17, x32, &x34);
+out[0] = x34;
+out[1] = x30;
+out[2] = x26;
+out[3] = x22;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/feopp.h b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.h
new file mode 100644
index 000000000..cda15c23a
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c
new file mode 100644
index 000000000..79ce81a88
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.c
@@ -0,0 +1,38 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint64_t x17; uint8_t x18 = _subborrow_u64(0x0, x5, x11, &x17);
+{ uint64_t x20; uint8_t x21 = _subborrow_u64(x18, x7, x13, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u64(x21, x9, x15, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u64(x24, x8, x14, &x26);
+{ uint64_t x28 = (uint64_t)cmovznz(x27, 0x0, 0xffffffffffffffffL);
+{ uint64_t x29 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(0x0, x17, x29, &x31);
+{ uint64_t x33 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x20, x33, &x35);
+{ uint64_t x37 = (x28 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x23, x37, &x39);
+{ uint64_t x41 = (x28 & 0xffa7ffffffffffffL);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x26, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/montgomery64_2e256m88x2e240m1/fesub.h b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.h
new file mode 100644
index 000000000..78c40b980
--- /dev/null
+++ b/src/Specific/montgomery64_2e256m88x2e240m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/montgomery64_2e266m3/feadd.c b/src/Specific/montgomery64_2e266m3/feadd.c
new file mode 100644
index 000000000..1d1635437
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffffdL, &x36);
+{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x3ff, &x48);
+{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+{ uint64_t x53 = cmovznz(x52, x48, x33);
+{ uint64_t x54 = cmovznz(x52, x45, x30);
+{ uint64_t x55 = cmovznz(x52, x42, x27);
+{ uint64_t x56 = cmovznz(x52, x39, x24);
+{ uint64_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e266m3/feadd.h b/src/Specific/montgomery64_2e266m3/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e266m3/femul.c b/src/Specific/montgomery64_2e266m3/femul.c
new file mode 100644
index 000000000..918a3a579
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffffdL, &x55);
+{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x3ff, &x67);
+{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffffdL, &x154);
+{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x3ff, &x166);
+{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffffdL, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x3ff, &x266);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffffdL, &x354);
+{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x3ff, &x366);
+{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffffdL, &x454);
+{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x3ff, &x466);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffffdL, &x502);
+{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x3ff, &x514);
+{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+{ uint64_t x519 = cmovznz(x518, x514, x498);
+{ uint64_t x520 = cmovznz(x518, x511, x495);
+{ uint64_t x521 = cmovznz(x518, x508, x492);
+{ uint64_t x522 = cmovznz(x518, x505, x489);
+{ uint64_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e266m3/femul.h b/src/Specific/montgomery64_2e266m3/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e266m3/fenz.c b/src/Specific/montgomery64_2e266m3/fenz.c
new file mode 100644
index 000000000..aaabff8a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x9 = (x8 | x7);
+{ uint64_t x10 = (x6 | x9);
+{ uint64_t x11 = (x4 | x10);
+{ uint64_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e266m3/fenz.h b/src/Specific/montgomery64_2e266m3/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e266m3/feopp.c b/src/Specific/montgomery64_2e266m3/feopp.c
new file mode 100644
index 000000000..4aa4357c9
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0xfffffffffffffffdL);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x3ff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e266m3/feopp.h b/src/Specific/montgomery64_2e266m3/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e266m3/fesub.c b/src/Specific/montgomery64_2e266m3/fesub.c
new file mode 100644
index 000000000..56dfeab8d
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+{ uint64_t x36 = (x35 & 0xfffffffffffffffdL);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+{ uint64_t x52 = (x35 & 0x3ff);
+{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e266m3/fesub.h b/src/Specific/montgomery64_2e266m3/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery64_2e266m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e285m9/feadd.c b/src/Specific/montgomery64_2e285m9/feadd.c
new file mode 100644
index 000000000..cc599a43a
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xfffffffffffffff7L, &x36);
+{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x1fffffff, &x48);
+{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+{ uint64_t x53 = cmovznz(x52, x48, x33);
+{ uint64_t x54 = cmovznz(x52, x45, x30);
+{ uint64_t x55 = cmovznz(x52, x42, x27);
+{ uint64_t x56 = cmovznz(x52, x39, x24);
+{ uint64_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e285m9/feadd.h b/src/Specific/montgomery64_2e285m9/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e285m9/femul.c b/src/Specific/montgomery64_2e285m9/femul.c
new file mode 100644
index 000000000..efed9d392
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xfffffffffffffff7L, &x55);
+{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x1fffffff, &x67);
+{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xfffffffffffffff7L, &x154);
+{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x1fffffff, &x166);
+{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xfffffffffffffff7L, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x1fffffff, &x266);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xfffffffffffffff7L, &x354);
+{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x1fffffff, &x366);
+{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xfffffffffffffff7L, &x454);
+{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x1fffffff, &x466);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xfffffffffffffff7L, &x502);
+{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x1fffffff, &x514);
+{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+{ uint64_t x519 = cmovznz(x518, x514, x498);
+{ uint64_t x520 = cmovznz(x518, x511, x495);
+{ uint64_t x521 = cmovznz(x518, x508, x492);
+{ uint64_t x522 = cmovznz(x518, x505, x489);
+{ uint64_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e285m9/femul.h b/src/Specific/montgomery64_2e285m9/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e285m9/fenz.c b/src/Specific/montgomery64_2e285m9/fenz.c
new file mode 100644
index 000000000..aaabff8a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x9 = (x8 | x7);
+{ uint64_t x10 = (x6 | x9);
+{ uint64_t x11 = (x4 | x10);
+{ uint64_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e285m9/fenz.h b/src/Specific/montgomery64_2e285m9/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e285m9/feopp.c b/src/Specific/montgomery64_2e285m9/feopp.c
new file mode 100644
index 000000000..cee3ef770
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0xfffffffffffffff7L);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x1fffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e285m9/feopp.h b/src/Specific/montgomery64_2e285m9/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e285m9/fesub.c b/src/Specific/montgomery64_2e285m9/fesub.c
new file mode 100644
index 000000000..1fe59bbdc
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+{ uint64_t x36 = (x35 & 0xfffffffffffffff7L);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+{ uint64_t x52 = (x35 & 0x1fffffff);
+{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e285m9/fesub.h b/src/Specific/montgomery64_2e285m9/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery64_2e285m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e291m19/feadd.c b/src/Specific/montgomery64_2e291m19/feadd.c
new file mode 100644
index 000000000..382d7aea0
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/feadd.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _addcarryx_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _addcarryx_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _addcarryx_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(x31, x10, x18, &x33);
+{ uint64_t x36; uint8_t x37 = _subborrow_u64(0x0, x21, 0xffffffffffffffedL, &x36);
+{ uint64_t x39; uint8_t x40 = _subborrow_u64(x37, x24, 0xffffffffffffffffL, &x39);
+{ uint64_t x42; uint8_t x43 = _subborrow_u64(x40, x27, 0xffffffffffffffffL, &x42);
+{ uint64_t x45; uint8_t x46 = _subborrow_u64(x43, x30, 0xffffffffffffffffL, &x45);
+{ uint64_t x48; uint8_t x49 = _subborrow_u64(x46, x33, 0x7ffffffff, &x48);
+{ uint64_t _; uint8_t x52 = _subborrow_u64(x49, x34, 0x0, &_);
+{ uint64_t x53 = cmovznz(x52, x48, x33);
+{ uint64_t x54 = cmovznz(x52, x45, x30);
+{ uint64_t x55 = cmovznz(x52, x42, x27);
+{ uint64_t x56 = cmovznz(x52, x39, x24);
+{ uint64_t x57 = cmovznz(x52, x36, x21);
+out[0] = x53;
+out[1] = x54;
+out[2] = x55;
+out[3] = x56;
+out[4] = x57;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e291m19/feadd.h b/src/Specific/montgomery64_2e291m19/feadd.h
new file mode 100644
index 000000000..60641ee0e
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e291m19/femul.c b/src/Specific/montgomery64_2e291m19/femul.c
new file mode 100644
index 000000000..ca888aa92
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/femul.c
@@ -0,0 +1,200 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x22; uint64_t x21 = _mulx_u64(x5, x13, &x22);
+{ uint64_t x25; uint64_t x24 = _mulx_u64(x5, x15, &x25);
+{ uint64_t x28; uint64_t x27 = _mulx_u64(x5, x17, &x28);
+{ uint64_t x31; uint64_t x30 = _mulx_u64(x5, x19, &x31);
+{ uint64_t x34; uint64_t x33 = _mulx_u64(x5, x18, &x34);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(0x0, x22, x24, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x25, x27, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x28, x30, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x31, x33, &x45);
+{ uint64_t x48; uint8_t _ = _addcarryx_u64(0x0, x46, x34, &x48);
+{ uint64_t _; uint64_t x51 = _mulx_u64(x21, 0x86bca1af286bca1bL, &_);
+{ uint64_t x55; uint64_t x54 = _mulx_u64(x51, 0xffffffffffffffedL, &x55);
+{ uint64_t x58; uint64_t x57 = _mulx_u64(x51, 0xffffffffffffffffL, &x58);
+{ uint64_t x61; uint64_t x60 = _mulx_u64(x51, 0xffffffffffffffffL, &x61);
+{ uint64_t x64; uint64_t x63 = _mulx_u64(x51, 0xffffffffffffffffL, &x64);
+{ uint64_t x67; uint64_t x66 = _mulx_u64(x51, 0x7ffffffff, &x67);
+{ uint64_t x69; uint8_t x70 = _addcarryx_u64(0x0, x55, x57, &x69);
+{ uint64_t x72; uint8_t x73 = _addcarryx_u64(x70, x58, x60, &x72);
+{ uint64_t x75; uint8_t x76 = _addcarryx_u64(x73, x61, x63, &x75);
+{ uint64_t x78; uint8_t x79 = _addcarryx_u64(x76, x64, x66, &x78);
+{ uint64_t x81; uint8_t _ = _addcarryx_u64(0x0, x79, x67, &x81);
+{ uint64_t _; uint8_t x85 = _addcarryx_u64(0x0, x21, x54, &_);
+{ uint64_t x87; uint8_t x88 = _addcarryx_u64(x85, x36, x69, &x87);
+{ uint64_t x90; uint8_t x91 = _addcarryx_u64(x88, x39, x72, &x90);
+{ uint64_t x93; uint8_t x94 = _addcarryx_u64(x91, x42, x75, &x93);
+{ uint64_t x96; uint8_t x97 = _addcarryx_u64(x94, x45, x78, &x96);
+{ uint64_t x99; uint8_t x100 = _addcarryx_u64(x97, x48, x81, &x99);
+{ uint64_t x103; uint64_t x102 = _mulx_u64(x7, x13, &x103);
+{ uint64_t x106; uint64_t x105 = _mulx_u64(x7, x15, &x106);
+{ uint64_t x109; uint64_t x108 = _mulx_u64(x7, x17, &x109);
+{ uint64_t x112; uint64_t x111 = _mulx_u64(x7, x19, &x112);
+{ uint64_t x115; uint64_t x114 = _mulx_u64(x7, x18, &x115);
+{ uint64_t x117; uint8_t x118 = _addcarryx_u64(0x0, x103, x105, &x117);
+{ uint64_t x120; uint8_t x121 = _addcarryx_u64(x118, x106, x108, &x120);
+{ uint64_t x123; uint8_t x124 = _addcarryx_u64(x121, x109, x111, &x123);
+{ uint64_t x126; uint8_t x127 = _addcarryx_u64(x124, x112, x114, &x126);
+{ uint64_t x129; uint8_t _ = _addcarryx_u64(0x0, x127, x115, &x129);
+{ uint64_t x132; uint8_t x133 = _addcarryx_u64(0x0, x87, x102, &x132);
+{ uint64_t x135; uint8_t x136 = _addcarryx_u64(x133, x90, x117, &x135);
+{ uint64_t x138; uint8_t x139 = _addcarryx_u64(x136, x93, x120, &x138);
+{ uint64_t x141; uint8_t x142 = _addcarryx_u64(x139, x96, x123, &x141);
+{ uint64_t x144; uint8_t x145 = _addcarryx_u64(x142, x99, x126, &x144);
+{ uint64_t x147; uint8_t x148 = _addcarryx_u64(x145, x100, x129, &x147);
+{ uint64_t _; uint64_t x150 = _mulx_u64(x132, 0x86bca1af286bca1bL, &_);
+{ uint64_t x154; uint64_t x153 = _mulx_u64(x150, 0xffffffffffffffedL, &x154);
+{ uint64_t x157; uint64_t x156 = _mulx_u64(x150, 0xffffffffffffffffL, &x157);
+{ uint64_t x160; uint64_t x159 = _mulx_u64(x150, 0xffffffffffffffffL, &x160);
+{ uint64_t x163; uint64_t x162 = _mulx_u64(x150, 0xffffffffffffffffL, &x163);
+{ uint64_t x166; uint64_t x165 = _mulx_u64(x150, 0x7ffffffff, &x166);
+{ uint64_t x168; uint8_t x169 = _addcarryx_u64(0x0, x154, x156, &x168);
+{ uint64_t x171; uint8_t x172 = _addcarryx_u64(x169, x157, x159, &x171);
+{ uint64_t x174; uint8_t x175 = _addcarryx_u64(x172, x160, x162, &x174);
+{ uint64_t x177; uint8_t x178 = _addcarryx_u64(x175, x163, x165, &x177);
+{ uint64_t x180; uint8_t _ = _addcarryx_u64(0x0, x178, x166, &x180);
+{ uint64_t _; uint8_t x184 = _addcarryx_u64(0x0, x132, x153, &_);
+{ uint64_t x186; uint8_t x187 = _addcarryx_u64(x184, x135, x168, &x186);
+{ uint64_t x189; uint8_t x190 = _addcarryx_u64(x187, x138, x171, &x189);
+{ uint64_t x192; uint8_t x193 = _addcarryx_u64(x190, x141, x174, &x192);
+{ uint64_t x195; uint8_t x196 = _addcarryx_u64(x193, x144, x177, &x195);
+{ uint64_t x198; uint8_t x199 = _addcarryx_u64(x196, x147, x180, &x198);
+{ uint8_t x200 = (x199 + x148);
+{ uint64_t x203; uint64_t x202 = _mulx_u64(x9, x13, &x203);
+{ uint64_t x206; uint64_t x205 = _mulx_u64(x9, x15, &x206);
+{ uint64_t x209; uint64_t x208 = _mulx_u64(x9, x17, &x209);
+{ uint64_t x212; uint64_t x211 = _mulx_u64(x9, x19, &x212);
+{ uint64_t x215; uint64_t x214 = _mulx_u64(x9, x18, &x215);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(0x0, x203, x205, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x206, x208, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x209, x211, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x212, x214, &x226);
+{ uint64_t x229; uint8_t _ = _addcarryx_u64(0x0, x227, x215, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(0x0, x186, x202, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x189, x217, &x235);
+{ uint64_t x238; uint8_t x239 = _addcarryx_u64(x236, x192, x220, &x238);
+{ uint64_t x241; uint8_t x242 = _addcarryx_u64(x239, x195, x223, &x241);
+{ uint64_t x244; uint8_t x245 = _addcarryx_u64(x242, x198, x226, &x244);
+{ uint64_t x247; uint8_t x248 = _addcarryx_u64(x245, x200, x229, &x247);
+{ uint64_t _; uint64_t x250 = _mulx_u64(x232, 0x86bca1af286bca1bL, &_);
+{ uint64_t x254; uint64_t x253 = _mulx_u64(x250, 0xffffffffffffffedL, &x254);
+{ uint64_t x257; uint64_t x256 = _mulx_u64(x250, 0xffffffffffffffffL, &x257);
+{ uint64_t x260; uint64_t x259 = _mulx_u64(x250, 0xffffffffffffffffL, &x260);
+{ uint64_t x263; uint64_t x262 = _mulx_u64(x250, 0xffffffffffffffffL, &x263);
+{ uint64_t x266; uint64_t x265 = _mulx_u64(x250, 0x7ffffffff, &x266);
+{ uint64_t x268; uint8_t x269 = _addcarryx_u64(0x0, x254, x256, &x268);
+{ uint64_t x271; uint8_t x272 = _addcarryx_u64(x269, x257, x259, &x271);
+{ uint64_t x274; uint8_t x275 = _addcarryx_u64(x272, x260, x262, &x274);
+{ uint64_t x277; uint8_t x278 = _addcarryx_u64(x275, x263, x265, &x277);
+{ uint64_t x280; uint8_t _ = _addcarryx_u64(0x0, x278, x266, &x280);
+{ uint64_t _; uint8_t x284 = _addcarryx_u64(0x0, x232, x253, &_);
+{ uint64_t x286; uint8_t x287 = _addcarryx_u64(x284, x235, x268, &x286);
+{ uint64_t x289; uint8_t x290 = _addcarryx_u64(x287, x238, x271, &x289);
+{ uint64_t x292; uint8_t x293 = _addcarryx_u64(x290, x241, x274, &x292);
+{ uint64_t x295; uint8_t x296 = _addcarryx_u64(x293, x244, x277, &x295);
+{ uint64_t x298; uint8_t x299 = _addcarryx_u64(x296, x247, x280, &x298);
+{ uint8_t x300 = (x299 + x248);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x11, x13, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x11, x15, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x11, x17, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x11, x19, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x11, x18, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x303, x305, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x306, x308, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x309, x311, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x312, x314, &x326);
+{ uint64_t x329; uint8_t _ = _addcarryx_u64(0x0, x327, x315, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(0x0, x286, x302, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x289, x317, &x335);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x292, x320, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x295, x323, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x298, x326, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x300, x329, &x347);
+{ uint64_t _; uint64_t x350 = _mulx_u64(x332, 0x86bca1af286bca1bL, &_);
+{ uint64_t x354; uint64_t x353 = _mulx_u64(x350, 0xffffffffffffffedL, &x354);
+{ uint64_t x357; uint64_t x356 = _mulx_u64(x350, 0xffffffffffffffffL, &x357);
+{ uint64_t x360; uint64_t x359 = _mulx_u64(x350, 0xffffffffffffffffL, &x360);
+{ uint64_t x363; uint64_t x362 = _mulx_u64(x350, 0xffffffffffffffffL, &x363);
+{ uint64_t x366; uint64_t x365 = _mulx_u64(x350, 0x7ffffffff, &x366);
+{ uint64_t x368; uint8_t x369 = _addcarryx_u64(0x0, x354, x356, &x368);
+{ uint64_t x371; uint8_t x372 = _addcarryx_u64(x369, x357, x359, &x371);
+{ uint64_t x374; uint8_t x375 = _addcarryx_u64(x372, x360, x362, &x374);
+{ uint64_t x377; uint8_t x378 = _addcarryx_u64(x375, x363, x365, &x377);
+{ uint64_t x380; uint8_t _ = _addcarryx_u64(0x0, x378, x366, &x380);
+{ uint64_t _; uint8_t x384 = _addcarryx_u64(0x0, x332, x353, &_);
+{ uint64_t x386; uint8_t x387 = _addcarryx_u64(x384, x335, x368, &x386);
+{ uint64_t x389; uint8_t x390 = _addcarryx_u64(x387, x338, x371, &x389);
+{ uint64_t x392; uint8_t x393 = _addcarryx_u64(x390, x341, x374, &x392);
+{ uint64_t x395; uint8_t x396 = _addcarryx_u64(x393, x344, x377, &x395);
+{ uint64_t x398; uint8_t x399 = _addcarryx_u64(x396, x347, x380, &x398);
+{ uint8_t x400 = (x399 + x348);
+{ uint64_t x403; uint64_t x402 = _mulx_u64(x10, x13, &x403);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x10, x15, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x10, x17, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x10, x19, &x412);
+{ uint64_t x415; uint64_t x414 = _mulx_u64(x10, x18, &x415);
+{ uint64_t x417; uint8_t x418 = _addcarryx_u64(0x0, x403, x405, &x417);
+{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x414, &x426);
+{ uint64_t x429; uint8_t _ = _addcarryx_u64(0x0, x427, x415, &x429);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(0x0, x386, x402, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x389, x417, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x392, x420, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x395, x423, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x398, x426, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x400, x429, &x447);
+{ uint64_t _; uint64_t x450 = _mulx_u64(x432, 0x86bca1af286bca1bL, &_);
+{ uint64_t x454; uint64_t x453 = _mulx_u64(x450, 0xffffffffffffffedL, &x454);
+{ uint64_t x457; uint64_t x456 = _mulx_u64(x450, 0xffffffffffffffffL, &x457);
+{ uint64_t x460; uint64_t x459 = _mulx_u64(x450, 0xffffffffffffffffL, &x460);
+{ uint64_t x463; uint64_t x462 = _mulx_u64(x450, 0xffffffffffffffffL, &x463);
+{ uint64_t x466; uint64_t x465 = _mulx_u64(x450, 0x7ffffffff, &x466);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(0x0, x454, x456, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x457, x459, &x471);
+{ uint64_t x474; uint8_t x475 = _addcarryx_u64(x472, x460, x462, &x474);
+{ uint64_t x477; uint8_t x478 = _addcarryx_u64(x475, x463, x465, &x477);
+{ uint64_t x480; uint8_t _ = _addcarryx_u64(0x0, x478, x466, &x480);
+{ uint64_t _; uint8_t x484 = _addcarryx_u64(0x0, x432, x453, &_);
+{ uint64_t x486; uint8_t x487 = _addcarryx_u64(x484, x435, x468, &x486);
+{ uint64_t x489; uint8_t x490 = _addcarryx_u64(x487, x438, x471, &x489);
+{ uint64_t x492; uint8_t x493 = _addcarryx_u64(x490, x441, x474, &x492);
+{ uint64_t x495; uint8_t x496 = _addcarryx_u64(x493, x444, x477, &x495);
+{ uint64_t x498; uint8_t x499 = _addcarryx_u64(x496, x447, x480, &x498);
+{ uint8_t x500 = (x499 + x448);
+{ uint64_t x502; uint8_t x503 = _subborrow_u64(0x0, x486, 0xffffffffffffffedL, &x502);
+{ uint64_t x505; uint8_t x506 = _subborrow_u64(x503, x489, 0xffffffffffffffffL, &x505);
+{ uint64_t x508; uint8_t x509 = _subborrow_u64(x506, x492, 0xffffffffffffffffL, &x508);
+{ uint64_t x511; uint8_t x512 = _subborrow_u64(x509, x495, 0xffffffffffffffffL, &x511);
+{ uint64_t x514; uint8_t x515 = _subborrow_u64(x512, x498, 0x7ffffffff, &x514);
+{ uint64_t _; uint8_t x518 = _subborrow_u64(x515, x500, 0x0, &_);
+{ uint64_t x519 = cmovznz(x518, x514, x498);
+{ uint64_t x520 = cmovznz(x518, x511, x495);
+{ uint64_t x521 = cmovznz(x518, x508, x492);
+{ uint64_t x522 = cmovznz(x518, x505, x489);
+{ uint64_t x523 = cmovznz(x518, x502, x486);
+out[0] = x519;
+out[1] = x520;
+out[2] = x521;
+out[3] = x522;
+out[4] = x523;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e291m19/femul.h b/src/Specific/montgomery64_2e291m19/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e291m19/fenz.c b/src/Specific/montgomery64_2e291m19/fenz.c
new file mode 100644
index 000000000..aaabff8a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/fenz.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x9 = (x8 | x7);
+{ uint64_t x10 = (x6 | x9);
+{ uint64_t x11 = (x4 | x10);
+{ uint64_t x12 = (x2 | x11);
+out[0] = x12;
+}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e291m19/fenz.h b/src/Specific/montgomery64_2e291m19/fenz.h
new file mode 100644
index 000000000..7bece2f06
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e291m19/feopp.c b/src/Specific/montgomery64_2e291m19/feopp.c
new file mode 100644
index 000000000..34d8efd27
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/feopp.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u64(0x0, 0x0, x2, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u64(x11, 0x0, x4, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u64(x14, 0x0, x6, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u64(x17, 0x0, x8, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u64(x20, 0x0, x7, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0xffffffffffffffedL);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u64(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0xffffffffffffffffL);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x7ffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u64(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e291m19/feopp.h b/src/Specific/montgomery64_2e291m19/feopp.h
new file mode 100644
index 000000000..ed4bf7238
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e291m19/fesub.c b/src/Specific/montgomery64_2e291m19/fesub.c
new file mode 100644
index 000000000..e45349aea
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/fesub.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(0x0, x5, x13, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, x7, x15, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, x9, x17, &x27);
+{ uint64_t x30; uint8_t x31 = _subborrow_u64(x28, x11, x19, &x30);
+{ uint64_t x33; uint8_t x34 = _subborrow_u64(x31, x10, x18, &x33);
+{ uint64_t x35 = (uint64_t)cmovznz(x34, 0x0, 0xffffffffffffffffL);
+{ uint64_t x36 = (x35 & 0xffffffffffffffedL);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(0x0, x21, x36, &x38);
+{ uint64_t x40 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x39, x24, x40, &x42);
+{ uint64_t x44 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x43, x27, x44, &x46);
+{ uint64_t x48 = (x35 & 0xffffffffffffffffL);
+{ uint64_t x50; uint8_t x51 = _addcarryx_u64(x47, x30, x48, &x50);
+{ uint64_t x52 = (x35 & 0x7ffffffff);
+{ uint64_t x54; uint8_t _ = _addcarryx_u64(x51, x33, x52, &x54);
+out[0] = x54;
+out[1] = x50;
+out[2] = x46;
+out[3] = x42;
+out[4] = x38;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/montgomery64_2e291m19/fesub.h b/src/Specific/montgomery64_2e291m19/fesub.h
new file mode 100644
index 000000000..d7d854c9c
--- /dev/null
+++ b/src/Specific/montgomery64_2e291m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/montgomery64_2e321m9/feadd.c b/src/Specific/montgomery64_2e321m9/feadd.c
new file mode 100644
index 000000000..28d3c78e3
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff7L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e321m9/feadd.h b/src/Specific/montgomery64_2e321m9/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e321m9/femul.c b/src/Specific/montgomery64_2e321m9/femul.c
new file mode 100644
index 000000000..07a92880a
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/femul.c
@@ -0,0 +1,260 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff7L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x65, x67, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x68, x70, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x71, x73, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x74, x76, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x77, x61, &x91);
+{ uint64_t _; uint8_t x95 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x97; uint8_t x98 = _addcarryx_u64(x95, x43, x79, &x97);
+{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x46, x82, &x100);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x49, x85, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x52, x88, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x55, x91, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x58, x92, &x112);
+{ uint64_t x116; uint64_t x115 = _mulx_u64(x7, x15, &x116);
+{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x17, &x119);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x19, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x21, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x23, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x22, &x131);
+{ uint64_t x133; uint8_t x134 = _addcarryx_u64(0x0, x116, x118, &x133);
+{ uint64_t x136; uint8_t x137 = _addcarryx_u64(x134, x119, x121, &x136);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t _ = _addcarryx_u64(0x0, x146, x131, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(0x0, x97, x115, &x151);
+{ uint64_t x154; uint8_t x155 = _addcarryx_u64(x152, x100, x133, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x113, x148, &x169);
+{ uint64_t _; uint64_t x172 = _mulx_u64(x151, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x176; uint64_t x175 = _mulx_u64(x172, 0xfffffffffffffff7L, &x176);
+{ uint64_t x179; uint64_t x178 = _mulx_u64(x172, 0xffffffffffffffffL, &x179);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x172, 0xffffffffffffffffL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x172, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x172, 0xffffffffffffffffL, &x188);
+{ uint64_t x190; uint8_t x191 = _addcarryx_u64(0x0, x176, x178, &x190);
+{ uint64_t x193; uint8_t x194 = _addcarryx_u64(x191, x179, x181, &x193);
+{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x182, x184, &x196);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x185, x187, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x188, x172, &x202);
+{ uint64_t _; uint8_t x206 = _addcarryx_u64(0x0, x151, x175, &_);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x154, x190, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x157, x193, &x211);
+{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x160, x196, &x214);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x163, x199, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x166, x202, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x169, x203, &x223);
+{ uint8_t x225 = (x224 + x170);
+{ uint64_t x228; uint64_t x227 = _mulx_u64(x9, x15, &x228);
+{ uint64_t x231; uint64_t x230 = _mulx_u64(x9, x17, &x231);
+{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x19, &x234);
+{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x21, &x237);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x23, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x22, &x243);
+{ uint64_t x245; uint8_t x246 = _addcarryx_u64(0x0, x228, x230, &x245);
+{ uint64_t x248; uint8_t x249 = _addcarryx_u64(x246, x231, x233, &x248);
+{ uint64_t x251; uint8_t x252 = _addcarryx_u64(x249, x234, x236, &x251);
+{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+{ uint64_t x260; uint8_t _ = _addcarryx_u64(0x0, x258, x243, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(0x0, x208, x227, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x211, x245, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x214, x248, &x269);
+{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x225, x260, &x281);
+{ uint64_t _; uint64_t x284 = _mulx_u64(x263, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x288; uint64_t x287 = _mulx_u64(x284, 0xfffffffffffffff7L, &x288);
+{ uint64_t x291; uint64_t x290 = _mulx_u64(x284, 0xffffffffffffffffL, &x291);
+{ uint64_t x294; uint64_t x293 = _mulx_u64(x284, 0xffffffffffffffffL, &x294);
+{ uint64_t x297; uint64_t x296 = _mulx_u64(x284, 0xffffffffffffffffL, &x297);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x284, 0xffffffffffffffffL, &x300);
+{ uint64_t x302; uint8_t x303 = _addcarryx_u64(0x0, x288, x290, &x302);
+{ uint64_t x305; uint8_t x306 = _addcarryx_u64(x303, x291, x293, &x305);
+{ uint64_t x308; uint8_t x309 = _addcarryx_u64(x306, x294, x296, &x308);
+{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x297, x299, &x311);
+{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x300, x284, &x314);
+{ uint64_t _; uint8_t x318 = _addcarryx_u64(0x0, x263, x287, &_);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x266, x302, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x269, x305, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x272, x308, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x275, x311, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x278, x314, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x281, x315, &x335);
+{ uint8_t x337 = (x336 + x282);
+{ uint64_t x340; uint64_t x339 = _mulx_u64(x11, x15, &x340);
+{ uint64_t x343; uint64_t x342 = _mulx_u64(x11, x17, &x343);
+{ uint64_t x346; uint64_t x345 = _mulx_u64(x11, x19, &x346);
+{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x21, &x349);
+{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x23, &x352);
+{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x22, &x355);
+{ uint64_t x357; uint8_t x358 = _addcarryx_u64(0x0, x340, x342, &x357);
+{ uint64_t x360; uint8_t x361 = _addcarryx_u64(x358, x343, x345, &x360);
+{ uint64_t x363; uint8_t x364 = _addcarryx_u64(x361, x346, x348, &x363);
+{ uint64_t x366; uint8_t x367 = _addcarryx_u64(x364, x349, x351, &x366);
+{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+{ uint64_t x372; uint8_t _ = _addcarryx_u64(0x0, x370, x355, &x372);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x320, x339, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x323, x357, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x326, x360, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x329, x363, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x337, x372, &x393);
+{ uint64_t _; uint64_t x396 = _mulx_u64(x375, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x400; uint64_t x399 = _mulx_u64(x396, 0xfffffffffffffff7L, &x400);
+{ uint64_t x403; uint64_t x402 = _mulx_u64(x396, 0xffffffffffffffffL, &x403);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x396, 0xffffffffffffffffL, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x396, 0xffffffffffffffffL, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x396, 0xffffffffffffffffL, &x412);
+{ uint64_t x414; uint8_t x415 = _addcarryx_u64(0x0, x400, x402, &x414);
+{ uint64_t x417; uint8_t x418 = _addcarryx_u64(x415, x403, x405, &x417);
+{ uint64_t x420; uint8_t x421 = _addcarryx_u64(x418, x406, x408, &x420);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(x421, x409, x411, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x412, x396, &x426);
+{ uint64_t _; uint8_t x430 = _addcarryx_u64(0x0, x375, x399, &_);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x378, x414, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x381, x417, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x384, x420, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x387, x423, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x390, x426, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x393, x427, &x447);
+{ uint8_t x449 = (x448 + x394);
+{ uint64_t x452; uint64_t x451 = _mulx_u64(x13, x15, &x452);
+{ uint64_t x455; uint64_t x454 = _mulx_u64(x13, x17, &x455);
+{ uint64_t x458; uint64_t x457 = _mulx_u64(x13, x19, &x458);
+{ uint64_t x461; uint64_t x460 = _mulx_u64(x13, x21, &x461);
+{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x23, &x464);
+{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x22, &x467);
+{ uint64_t x469; uint8_t x470 = _addcarryx_u64(0x0, x452, x454, &x469);
+{ uint64_t x472; uint8_t x473 = _addcarryx_u64(x470, x455, x457, &x472);
+{ uint64_t x475; uint8_t x476 = _addcarryx_u64(x473, x458, x460, &x475);
+{ uint64_t x478; uint8_t x479 = _addcarryx_u64(x476, x461, x463, &x478);
+{ uint64_t x481; uint8_t x482 = _addcarryx_u64(x479, x464, x466, &x481);
+{ uint64_t x484; uint8_t _ = _addcarryx_u64(0x0, x482, x467, &x484);
+{ uint64_t x487; uint8_t x488 = _addcarryx_u64(0x0, x432, x451, &x487);
+{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x435, x469, &x490);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x438, x472, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x441, x475, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x444, x478, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x449, x484, &x505);
+{ uint64_t _; uint64_t x508 = _mulx_u64(x487, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x512; uint64_t x511 = _mulx_u64(x508, 0xfffffffffffffff7L, &x512);
+{ uint64_t x515; uint64_t x514 = _mulx_u64(x508, 0xffffffffffffffffL, &x515);
+{ uint64_t x518; uint64_t x517 = _mulx_u64(x508, 0xffffffffffffffffL, &x518);
+{ uint64_t x521; uint64_t x520 = _mulx_u64(x508, 0xffffffffffffffffL, &x521);
+{ uint64_t x524; uint64_t x523 = _mulx_u64(x508, 0xffffffffffffffffL, &x524);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(0x0, x512, x514, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x515, x517, &x529);
+{ uint64_t x532; uint8_t x533 = _addcarryx_u64(x530, x518, x520, &x532);
+{ uint64_t x535; uint8_t x536 = _addcarryx_u64(x533, x521, x523, &x535);
+{ uint64_t x538; uint8_t x539 = _addcarryx_u64(x536, x524, x508, &x538);
+{ uint64_t _; uint8_t x542 = _addcarryx_u64(0x0, x487, x511, &_);
+{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x490, x526, &x544);
+{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x493, x529, &x547);
+{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x496, x532, &x550);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(x551, x499, x535, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x502, x538, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x505, x539, &x559);
+{ uint8_t x561 = (x560 + x506);
+{ uint64_t x564; uint64_t x563 = _mulx_u64(x12, x15, &x564);
+{ uint64_t x567; uint64_t x566 = _mulx_u64(x12, x17, &x567);
+{ uint64_t x570; uint64_t x569 = _mulx_u64(x12, x19, &x570);
+{ uint64_t x573; uint64_t x572 = _mulx_u64(x12, x21, &x573);
+{ uint64_t x576; uint64_t x575 = _mulx_u64(x12, x23, &x576);
+{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x22, &x579);
+{ uint64_t x581; uint8_t x582 = _addcarryx_u64(0x0, x564, x566, &x581);
+{ uint64_t x584; uint8_t x585 = _addcarryx_u64(x582, x567, x569, &x584);
+{ uint64_t x587; uint8_t x588 = _addcarryx_u64(x585, x570, x572, &x587);
+{ uint64_t x590; uint8_t x591 = _addcarryx_u64(x588, x573, x575, &x590);
+{ uint64_t x593; uint8_t x594 = _addcarryx_u64(x591, x576, x578, &x593);
+{ uint64_t x596; uint8_t _ = _addcarryx_u64(0x0, x594, x579, &x596);
+{ uint64_t x599; uint8_t x600 = _addcarryx_u64(0x0, x544, x563, &x599);
+{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x547, x581, &x602);
+{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x550, x584, &x605);
+{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x553, x587, &x608);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(x609, x556, x590, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x559, x593, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x561, x596, &x617);
+{ uint64_t _; uint64_t x620 = _mulx_u64(x599, 0x8e38e38e38e38e39L, &_);
+{ uint64_t x624; uint64_t x623 = _mulx_u64(x620, 0xfffffffffffffff7L, &x624);
+{ uint64_t x627; uint64_t x626 = _mulx_u64(x620, 0xffffffffffffffffL, &x627);
+{ uint64_t x630; uint64_t x629 = _mulx_u64(x620, 0xffffffffffffffffL, &x630);
+{ uint64_t x633; uint64_t x632 = _mulx_u64(x620, 0xffffffffffffffffL, &x633);
+{ uint64_t x636; uint64_t x635 = _mulx_u64(x620, 0xffffffffffffffffL, &x636);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(0x0, x624, x626, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x627, x629, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x630, x632, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x633, x635, &x647);
+{ uint64_t x650; uint8_t x651 = _addcarryx_u64(x648, x636, x620, &x650);
+{ uint64_t _; uint8_t x654 = _addcarryx_u64(0x0, x599, x623, &_);
+{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x602, x638, &x656);
+{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x605, x641, &x659);
+{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x608, x644, &x662);
+{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x611, x647, &x665);
+{ uint64_t x668; uint8_t x669 = _addcarryx_u64(x666, x614, x650, &x668);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(x669, x617, x651, &x671);
+{ uint8_t x673 = (x672 + x618);
+{ uint64_t x675; uint8_t x676 = _subborrow_u64(0x0, x656, 0xfffffffffffffff7L, &x675);
+{ uint64_t x678; uint8_t x679 = _subborrow_u64(x676, x659, 0xffffffffffffffffL, &x678);
+{ uint64_t x681; uint8_t x682 = _subborrow_u64(x679, x662, 0xffffffffffffffffL, &x681);
+{ uint64_t x684; uint8_t x685 = _subborrow_u64(x682, x665, 0xffffffffffffffffL, &x684);
+{ uint64_t x687; uint8_t x688 = _subborrow_u64(x685, x668, 0xffffffffffffffffL, &x687);
+{ uint64_t x690; uint8_t x691 = _subborrow_u64(x688, x671, 0x1, &x690);
+{ uint64_t _; uint8_t x694 = _subborrow_u64(x691, x673, 0x0, &_);
+{ uint64_t x695 = cmovznz(x694, x690, x671);
+{ uint64_t x696 = cmovznz(x694, x687, x668);
+{ uint64_t x697 = cmovznz(x694, x684, x665);
+{ uint64_t x698 = cmovznz(x694, x681, x662);
+{ uint64_t x699 = cmovznz(x694, x678, x659);
+{ uint64_t x700 = cmovznz(x694, x675, x656);
+out[0] = x695;
+out[1] = x696;
+out[2] = x697;
+out[3] = x698;
+out[4] = x699;
+out[5] = x700;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e321m9/femul.h b/src/Specific/montgomery64_2e321m9/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e321m9/fenz.c b/src/Specific/montgomery64_2e321m9/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e321m9/fenz.h b/src/Specific/montgomery64_2e321m9/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e321m9/feopp.c b/src/Specific/montgomery64_2e321m9/feopp.c
new file mode 100644
index 000000000..41fcb70ec
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xfffffffffffffff7L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint8_t x50 = ((uint8_t)x29 & 0x1);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e321m9/feopp.h b/src/Specific/montgomery64_2e321m9/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e321m9/fesub.c b/src/Specific/montgomery64_2e321m9/fesub.c
new file mode 100644
index 000000000..1e59f4d31
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xfffffffffffffff7L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint8_t x63 = ((uint8_t)x42 & 0x1);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e321m9/fesub.h b/src/Specific/montgomery64_2e321m9/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e321m9/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feadd.c b/src/Specific/montgomery64_2e322m2e161m1/feadd.c
new file mode 100644
index 000000000..659abdb6a
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffdffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feadd.h b/src/Specific/montgomery64_2e322m2e161m1/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e322m2e161m1/femul.c b/src/Specific/montgomery64_2e322m2e161m1/femul.c
new file mode 100644
index 000000000..e156fe17f
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xfffffffdffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+out[0] = uint64_t x76;
+out[1] = uint8_t x77 = Op Syntax.MulSplit 64 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 x25;
+out[2] = 0x3;;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/montgomery64_2e322m2e161m1/femul.h b/src/Specific/montgomery64_2e322m2e161m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fenz.c b/src/Specific/montgomery64_2e322m2e161m1/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fenz.h b/src/Specific/montgomery64_2e322m2e161m1/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feopp.c b/src/Specific/montgomery64_2e322m2e161m1/feopp.c
new file mode 100644
index 000000000..f5026da1d
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xfffffffdffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint8_t x50 = ((uint8_t)x29 & 0x3);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e322m2e161m1/feopp.h b/src/Specific/montgomery64_2e322m2e161m1/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fesub.c b/src/Specific/montgomery64_2e322m2e161m1/fesub.c
new file mode 100644
index 000000000..e8818feb1
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xfffffffdffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint8_t x63 = ((uint8_t)x42 & 0x3);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e322m2e161m1/fesub.h b/src/Specific/montgomery64_2e322m2e161m1/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e322m2e161m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m17/feadd.c b/src/Specific/montgomery64_2e336m17/feadd.c
new file mode 100644
index 000000000..62ecf8d23
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffefL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m17/feadd.h b/src/Specific/montgomery64_2e336m17/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m17/femul.c b/src/Specific/montgomery64_2e336m17/femul.c
new file mode 100644
index 000000000..e55a79c42
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffefL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffefL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffefL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffefL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffefL, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xf0f0f0f0f0f0f0f1L, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffefL, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffefL, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m17/femul.h b/src/Specific/montgomery64_2e336m17/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m17/fenz.c b/src/Specific/montgomery64_2e336m17/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e336m17/fenz.h b/src/Specific/montgomery64_2e336m17/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e336m17/feopp.c b/src/Specific/montgomery64_2e336m17/feopp.c
new file mode 100644
index 000000000..a0d10433e
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffefL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m17/feopp.h b/src/Specific/montgomery64_2e336m17/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e336m17/fesub.c b/src/Specific/montgomery64_2e336m17/fesub.c
new file mode 100644
index 000000000..c7c9d3a78
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffefL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m17/fesub.h b/src/Specific/montgomery64_2e336m17/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m17/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m3/feadd.c b/src/Specific/montgomery64_2e336m3/feadd.c
new file mode 100644
index 000000000..96ab32ae5
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffffdL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m3/feadd.h b/src/Specific/montgomery64_2e336m3/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m3/femul.c b/src/Specific/montgomery64_2e336m3/femul.c
new file mode 100644
index 000000000..4184be0e3
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffffdL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffffdL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffffdL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffffdL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffffdL, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xaaaaaaaaaaaaaaabL, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffffdL, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffffdL, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m3/femul.h b/src/Specific/montgomery64_2e336m3/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e336m3/fenz.c b/src/Specific/montgomery64_2e336m3/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e336m3/fenz.h b/src/Specific/montgomery64_2e336m3/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e336m3/feopp.c b/src/Specific/montgomery64_2e336m3/feopp.c
new file mode 100644
index 000000000..0bfd80774
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xfffffffffffffffdL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m3/feopp.h b/src/Specific/montgomery64_2e336m3/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e336m3/fesub.c b/src/Specific/montgomery64_2e336m3/fesub.c
new file mode 100644
index 000000000..268ea9437
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xfffffffffffffffdL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e336m3/fesub.h b/src/Specific/montgomery64_2e336m3/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e336m3/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e338m15/feadd.c b/src/Specific/montgomery64_2e338m15/feadd.c
new file mode 100644
index 000000000..634312cc1
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffff1L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3ffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e338m15/feadd.h b/src/Specific/montgomery64_2e338m15/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e338m15/femul.c b/src/Specific/montgomery64_2e338m15/femul.c
new file mode 100644
index 000000000..2d8f35d6c
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffff1L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3ffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffff1L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3ffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffff1L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3ffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffff1L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3ffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffff1L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3ffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xeeeeeeeeeeeeeeefL, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffff1L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3ffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffff1L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3ffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e338m15/femul.h b/src/Specific/montgomery64_2e338m15/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e338m15/fenz.c b/src/Specific/montgomery64_2e338m15/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e338m15/fenz.h b/src/Specific/montgomery64_2e338m15/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e338m15/feopp.c b/src/Specific/montgomery64_2e338m15/feopp.c
new file mode 100644
index 000000000..e088984dc
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xfffffffffffffff1L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x3ffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e338m15/feopp.h b/src/Specific/montgomery64_2e338m15/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e338m15/fesub.c b/src/Specific/montgomery64_2e338m15/fesub.c
new file mode 100644
index 000000000..c20f02de3
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xfffffffffffffff1L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x3ffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e338m15/fesub.h b/src/Specific/montgomery64_2e338m15/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e338m15/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e369m25/feadd.c b/src/Specific/montgomery64_2e369m25/feadd.c
new file mode 100644
index 000000000..20f02c39e
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe7L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x1ffffffffffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e369m25/feadd.h b/src/Specific/montgomery64_2e369m25/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e369m25/femul.c b/src/Specific/montgomery64_2e369m25/femul.c
new file mode 100644
index 000000000..9ce0556eb
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe7L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x1ffffffffffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe7L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x1ffffffffffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe7L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x1ffffffffffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe7L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x1ffffffffffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe7L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x1ffffffffffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8f5c28f5c28f5c29L, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe7L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x1ffffffffffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe7L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x1ffffffffffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e369m25/femul.h b/src/Specific/montgomery64_2e369m25/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e369m25/fenz.c b/src/Specific/montgomery64_2e369m25/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e369m25/fenz.h b/src/Specific/montgomery64_2e369m25/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e369m25/feopp.c b/src/Specific/montgomery64_2e369m25/feopp.c
new file mode 100644
index 000000000..c77996a8b
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffe7L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x1ffffffffffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e369m25/feopp.h b/src/Specific/montgomery64_2e369m25/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e369m25/fesub.c b/src/Specific/montgomery64_2e369m25/fesub.c
new file mode 100644
index 000000000..392994d17
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffe7L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x1ffffffffffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e369m25/fesub.h b/src/Specific/montgomery64_2e369m25/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e369m25/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e379m19/feadd.c b/src/Specific/montgomery64_2e379m19/feadd.c
new file mode 100644
index 000000000..7e29347b3
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffedL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7ffffffffffffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e379m19/feadd.h b/src/Specific/montgomery64_2e379m19/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e379m19/femul.c b/src/Specific/montgomery64_2e379m19/femul.c
new file mode 100644
index 000000000..d641da6dc
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x86bca1af286bca1bL, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffedL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7ffffffffffffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x86bca1af286bca1bL, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffedL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7ffffffffffffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x86bca1af286bca1bL, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffedL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7ffffffffffffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x86bca1af286bca1bL, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffedL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7ffffffffffffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x86bca1af286bca1bL, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffedL, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7ffffffffffffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x86bca1af286bca1bL, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffedL, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7ffffffffffffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffedL, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7ffffffffffffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e379m19/femul.h b/src/Specific/montgomery64_2e379m19/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e379m19/fenz.c b/src/Specific/montgomery64_2e379m19/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e379m19/fenz.h b/src/Specific/montgomery64_2e379m19/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e379m19/feopp.c b/src/Specific/montgomery64_2e379m19/feopp.c
new file mode 100644
index 000000000..63e2ab312
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffedL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x7ffffffffffffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e379m19/feopp.h b/src/Specific/montgomery64_2e379m19/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e379m19/fesub.c b/src/Specific/montgomery64_2e379m19/fesub.c
new file mode 100644
index 000000000..8f8ef0681
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffedL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x7ffffffffffffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e379m19/fesub.h b/src/Specific/montgomery64_2e379m19/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e379m19/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e382m105/feadd.c b/src/Specific/montgomery64_2e382m105/feadd.c
new file mode 100644
index 000000000..d2d1c0656
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff97L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x3fffffffffffffff, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e382m105/feadd.h b/src/Specific/montgomery64_2e382m105/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e382m105/femul.c b/src/Specific/montgomery64_2e382m105/femul.c
new file mode 100644
index 000000000..dd2961da5
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff97L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x3fffffffffffffff, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff97L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x3fffffffffffffff, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff97L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x3fffffffffffffff, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff97L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x3fffffffffffffff, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff97L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x3fffffffffffffff, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8fd8fd8fd8fd8fd9L, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff97L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x3fffffffffffffff, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff97L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x3fffffffffffffff, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e382m105/femul.h b/src/Specific/montgomery64_2e382m105/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e382m105/fenz.c b/src/Specific/montgomery64_2e382m105/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e382m105/fenz.h b/src/Specific/montgomery64_2e382m105/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e382m105/feopp.c b/src/Specific/montgomery64_2e382m105/feopp.c
new file mode 100644
index 000000000..611e0b97e
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffff97L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x3fffffffffffffff);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e382m105/feopp.h b/src/Specific/montgomery64_2e382m105/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e382m105/fesub.c b/src/Specific/montgomery64_2e382m105/fesub.c
new file mode 100644
index 000000000..2f1cd8bb4
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffff97L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x3fffffffffffffff);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e382m105/fesub.h b/src/Specific/montgomery64_2e382m105/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e382m105/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m187/feadd.c b/src/Specific/montgomery64_2e383m187/feadd.c
new file mode 100644
index 000000000..fb1dff18e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffff45L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m187/feadd.h b/src/Specific/montgomery64_2e383m187/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m187/femul.c b/src/Specific/montgomery64_2e383m187/femul.c
new file mode 100644
index 000000000..ab1b51c11
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x8a4472fea18a4473L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffff45L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x8a4472fea18a4473L, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffff45L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x8a4472fea18a4473L, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffff45L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x8a4472fea18a4473L, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffff45L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x8a4472fea18a4473L, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffff45L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x8a4472fea18a4473L, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffff45L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffff45L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m187/femul.h b/src/Specific/montgomery64_2e383m187/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m187/fenz.c b/src/Specific/montgomery64_2e383m187/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e383m187/fenz.h b/src/Specific/montgomery64_2e383m187/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m187/feopp.c b/src/Specific/montgomery64_2e383m187/feopp.c
new file mode 100644
index 000000000..9979ec8e7
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffff45L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m187/feopp.h b/src/Specific/montgomery64_2e383m187/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m187/fesub.c b/src/Specific/montgomery64_2e383m187/fesub.c
new file mode 100644
index 000000000..62ea521aa
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffff45L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m187/fesub.h b/src/Specific/montgomery64_2e383m187/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m187/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m31/feadd.c b/src/Specific/montgomery64_2e383m31/feadd.c
new file mode 100644
index 000000000..e13035210
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffe1L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m31/feadd.h b/src/Specific/montgomery64_2e383m31/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m31/femul.c b/src/Specific/montgomery64_2e383m31/femul.c
new file mode 100644
index 000000000..cae0fe0bd
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffffffffffe1L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffffffffffe1L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffffffffffe1L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffffffffffe1L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffffffffffe1L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xef7bdef7bdef7bdfL, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffffffffffe1L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffffffffffe1L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m31/femul.h b/src/Specific/montgomery64_2e383m31/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m31/fenz.c b/src/Specific/montgomery64_2e383m31/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e383m31/fenz.h b/src/Specific/montgomery64_2e383m31/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m31/feopp.c b/src/Specific/montgomery64_2e383m31/feopp.c
new file mode 100644
index 000000000..3942002b4
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffe1L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m31/feopp.h b/src/Specific/montgomery64_2e383m31/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m31/fesub.c b/src/Specific/montgomery64_2e383m31/fesub.c
new file mode 100644
index 000000000..3e356e36c
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffe1L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m31/fesub.h b/src/Specific/montgomery64_2e383m31/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m31/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m421/feadd.c b/src/Specific/montgomery64_2e383m421/feadd.c
new file mode 100644
index 000000000..6e928a84a
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffe5bL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0x7fffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m421/feadd.h b/src/Specific/montgomery64_2e383m421/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m421/femul.c b/src/Specific/montgomery64_2e383m421/femul.c
new file mode 100644
index 000000000..0ebaa7ed4
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffe5bL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0x7fffffffffffffffL, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffe5bL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0x7fffffffffffffffL, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffe5bL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0x7fffffffffffffffL, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffe5bL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0x7fffffffffffffffL, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffe5bL, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0x7fffffffffffffffL, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xcebeef94fa86fe2dL, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffe5bL, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0x7fffffffffffffffL, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffe5bL, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0x7fffffffffffffffL, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m421/femul.h b/src/Specific/montgomery64_2e383m421/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e383m421/fenz.c b/src/Specific/montgomery64_2e383m421/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e383m421/fenz.h b/src/Specific/montgomery64_2e383m421/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m421/feopp.c b/src/Specific/montgomery64_2e383m421/feopp.c
new file mode 100644
index 000000000..3b3069cb7
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xfffffffffffffe5bL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0x7fffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m421/feopp.h b/src/Specific/montgomery64_2e383m421/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e383m421/fesub.c b/src/Specific/montgomery64_2e383m421/fesub.c
new file mode 100644
index 000000000..2fd6b28f2
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xfffffffffffffe5bL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0x7fffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e383m421/fesub.h b/src/Specific/montgomery64_2e383m421/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e383m421/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c
new file mode 100644
index 000000000..83bd085ec
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffff, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffff00000000L, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xfffffffffffffffeL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.h b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c
new file mode 100644
index 000000000..5fd52e99c
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0x100000001, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xffffffff, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffff00000000L, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xfffffffffffffffeL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0x100000001, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xffffffff, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffff00000000L, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xfffffffffffffffeL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0x100000001, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xffffffff, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffff00000000L, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xfffffffffffffffeL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0x100000001, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xffffffff, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffff00000000L, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xfffffffffffffffeL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0x100000001, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xffffffff, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffff00000000L, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xfffffffffffffffeL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0x100000001, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xffffffff, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffff00000000L, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xfffffffffffffffeL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xffffffff, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffff00000000L, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xfffffffffffffffeL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.h b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.h b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c
new file mode 100644
index 000000000..2bf91b19f
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffff);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffff00000000L);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xfffffffffffffffeL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.h b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c
new file mode 100644
index 000000000..2896cbb98
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffff);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffff00000000L);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xfffffffffffffffeL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.h b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m2e128m2e96p2e32m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m317/feadd.c b/src/Specific/montgomery64_2e384m317/feadd.c
new file mode 100644
index 000000000..3ee6d5d23
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xfffffffffffffec3L, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xffffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m317/feadd.h b/src/Specific/montgomery64_2e384m317/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m317/femul.c b/src/Specific/montgomery64_2e384m317/femul.c
new file mode 100644
index 000000000..1516efea0
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/femul.c
@@ -0,0 +1,272 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t _; uint64_t x61 = _mulx_u64(x25, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x61, 0xfffffffffffffec3L, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x61, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x61, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x61, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x61, 0xffffffffffffffffL, &x77);
+{ uint64_t x80; uint64_t x79 = _mulx_u64(x61, 0xffffffffffffffffL, &x80);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(0x0, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t x95 = _addcarryx_u64(x92, x77, x79, &x94);
+{ uint64_t x97; uint8_t _ = _addcarryx_u64(0x0, x95, x80, &x97);
+{ uint64_t _; uint8_t x101 = _addcarryx_u64(0x0, x25, x64, &_);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x43, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x46, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x49, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x52, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x55, x94, &x115);
+{ uint64_t x118; uint8_t x119 = _addcarryx_u64(x116, x58, x97, &x118);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x15, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x17, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x19, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x21, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x23, &x134);
+{ uint64_t x137; uint64_t x136 = _mulx_u64(x7, x22, &x137);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(0x0, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t x152 = _addcarryx_u64(x149, x134, x136, &x151);
+{ uint64_t x154; uint8_t _ = _addcarryx_u64(0x0, x152, x137, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(0x0, x103, x121, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x118, x151, &x172);
+{ uint64_t x175; uint8_t x176 = _addcarryx_u64(x173, x119, x154, &x175);
+{ uint64_t _; uint64_t x178 = _mulx_u64(x157, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x178, 0xfffffffffffffec3L, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x178, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x178, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x178, 0xffffffffffffffffL, &x191);
+{ uint64_t x194; uint64_t x193 = _mulx_u64(x178, 0xffffffffffffffffL, &x194);
+{ uint64_t x197; uint64_t x196 = _mulx_u64(x178, 0xffffffffffffffffL, &x197);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(0x0, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t x209 = _addcarryx_u64(x206, x191, x193, &x208);
+{ uint64_t x211; uint8_t x212 = _addcarryx_u64(x209, x194, x196, &x211);
+{ uint64_t x214; uint8_t _ = _addcarryx_u64(0x0, x212, x197, &x214);
+{ uint64_t _; uint8_t x218 = _addcarryx_u64(0x0, x157, x181, &_);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x160, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x163, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x166, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x169, x208, &x229);
+{ uint64_t x232; uint8_t x233 = _addcarryx_u64(x230, x172, x211, &x232);
+{ uint64_t x235; uint8_t x236 = _addcarryx_u64(x233, x175, x214, &x235);
+{ uint8_t x237 = (x236 + x176);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x15, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x17, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x19, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x21, &x249);
+{ uint64_t x252; uint64_t x251 = _mulx_u64(x9, x23, &x252);
+{ uint64_t x255; uint64_t x254 = _mulx_u64(x9, x22, &x255);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(0x0, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t x267 = _addcarryx_u64(x264, x249, x251, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(x267, x252, x254, &x269);
+{ uint64_t x272; uint8_t _ = _addcarryx_u64(0x0, x270, x255, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(0x0, x220, x239, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x232, x266, &x287);
+{ uint64_t x290; uint8_t x291 = _addcarryx_u64(x288, x235, x269, &x290);
+{ uint64_t x293; uint8_t x294 = _addcarryx_u64(x291, x237, x272, &x293);
+{ uint64_t _; uint64_t x296 = _mulx_u64(x275, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x296, 0xfffffffffffffec3L, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x296, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x296, 0xffffffffffffffffL, &x306);
+{ uint64_t x309; uint64_t x308 = _mulx_u64(x296, 0xffffffffffffffffL, &x309);
+{ uint64_t x312; uint64_t x311 = _mulx_u64(x296, 0xffffffffffffffffL, &x312);
+{ uint64_t x315; uint64_t x314 = _mulx_u64(x296, 0xffffffffffffffffL, &x315);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(0x0, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t x324 = _addcarryx_u64(x321, x306, x308, &x323);
+{ uint64_t x326; uint8_t x327 = _addcarryx_u64(x324, x309, x311, &x326);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x312, x314, &x329);
+{ uint64_t x332; uint8_t _ = _addcarryx_u64(0x0, x330, x315, &x332);
+{ uint64_t _; uint8_t x336 = _addcarryx_u64(0x0, x275, x299, &_);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x278, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x281, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x284, x323, &x344);
+{ uint64_t x347; uint8_t x348 = _addcarryx_u64(x345, x287, x326, &x347);
+{ uint64_t x350; uint8_t x351 = _addcarryx_u64(x348, x290, x329, &x350);
+{ uint64_t x353; uint8_t x354 = _addcarryx_u64(x351, x293, x332, &x353);
+{ uint8_t x355 = (x354 + x294);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x15, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x17, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x19, &x364);
+{ uint64_t x367; uint64_t x366 = _mulx_u64(x11, x21, &x367);
+{ uint64_t x370; uint64_t x369 = _mulx_u64(x11, x23, &x370);
+{ uint64_t x373; uint64_t x372 = _mulx_u64(x11, x22, &x373);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(0x0, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t x382 = _addcarryx_u64(x379, x364, x366, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(x382, x367, x369, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x370, x372, &x387);
+{ uint64_t x390; uint8_t _ = _addcarryx_u64(0x0, x388, x373, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(0x0, x338, x357, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x347, x381, &x402);
+{ uint64_t x405; uint8_t x406 = _addcarryx_u64(x403, x350, x384, &x405);
+{ uint64_t x408; uint8_t x409 = _addcarryx_u64(x406, x353, x387, &x408);
+{ uint64_t x411; uint8_t x412 = _addcarryx_u64(x409, x355, x390, &x411);
+{ uint64_t _; uint64_t x414 = _mulx_u64(x393, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x414, 0xfffffffffffffec3L, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x414, 0xffffffffffffffffL, &x421);
+{ uint64_t x424; uint64_t x423 = _mulx_u64(x414, 0xffffffffffffffffL, &x424);
+{ uint64_t x427; uint64_t x426 = _mulx_u64(x414, 0xffffffffffffffffL, &x427);
+{ uint64_t x430; uint64_t x429 = _mulx_u64(x414, 0xffffffffffffffffL, &x430);
+{ uint64_t x433; uint64_t x432 = _mulx_u64(x414, 0xffffffffffffffffL, &x433);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(0x0, x418, x420, &x435);
+{ uint64_t x438; uint8_t x439 = _addcarryx_u64(x436, x421, x423, &x438);
+{ uint64_t x441; uint8_t x442 = _addcarryx_u64(x439, x424, x426, &x441);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x427, x429, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x430, x432, &x447);
+{ uint64_t x450; uint8_t _ = _addcarryx_u64(0x0, x448, x433, &x450);
+{ uint64_t _; uint8_t x454 = _addcarryx_u64(0x0, x393, x417, &_);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x396, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x399, x438, &x459);
+{ uint64_t x462; uint8_t x463 = _addcarryx_u64(x460, x402, x441, &x462);
+{ uint64_t x465; uint8_t x466 = _addcarryx_u64(x463, x405, x444, &x465);
+{ uint64_t x468; uint8_t x469 = _addcarryx_u64(x466, x408, x447, &x468);
+{ uint64_t x471; uint8_t x472 = _addcarryx_u64(x469, x411, x450, &x471);
+{ uint8_t x473 = (x472 + x412);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x15, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x17, &x479);
+{ uint64_t x482; uint64_t x481 = _mulx_u64(x13, x19, &x482);
+{ uint64_t x485; uint64_t x484 = _mulx_u64(x13, x21, &x485);
+{ uint64_t x488; uint64_t x487 = _mulx_u64(x13, x23, &x488);
+{ uint64_t x491; uint64_t x490 = _mulx_u64(x13, x22, &x491);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(0x0, x476, x478, &x493);
+{ uint64_t x496; uint8_t x497 = _addcarryx_u64(x494, x479, x481, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(x497, x482, x484, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x485, x487, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x488, x490, &x505);
+{ uint64_t x508; uint8_t _ = _addcarryx_u64(0x0, x506, x491, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(0x0, x456, x475, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x462, x496, &x517);
+{ uint64_t x520; uint8_t x521 = _addcarryx_u64(x518, x465, x499, &x520);
+{ uint64_t x523; uint8_t x524 = _addcarryx_u64(x521, x468, x502, &x523);
+{ uint64_t x526; uint8_t x527 = _addcarryx_u64(x524, x471, x505, &x526);
+{ uint64_t x529; uint8_t x530 = _addcarryx_u64(x527, x473, x508, &x529);
+{ uint64_t _; uint64_t x532 = _mulx_u64(x511, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x532, 0xfffffffffffffec3L, &x536);
+{ uint64_t x539; uint64_t x538 = _mulx_u64(x532, 0xffffffffffffffffL, &x539);
+{ uint64_t x542; uint64_t x541 = _mulx_u64(x532, 0xffffffffffffffffL, &x542);
+{ uint64_t x545; uint64_t x544 = _mulx_u64(x532, 0xffffffffffffffffL, &x545);
+{ uint64_t x548; uint64_t x547 = _mulx_u64(x532, 0xffffffffffffffffL, &x548);
+{ uint64_t x551; uint64_t x550 = _mulx_u64(x532, 0xffffffffffffffffL, &x551);
+{ uint64_t x553; uint8_t x554 = _addcarryx_u64(0x0, x536, x538, &x553);
+{ uint64_t x556; uint8_t x557 = _addcarryx_u64(x554, x539, x541, &x556);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x542, x544, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x545, x547, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x548, x550, &x565);
+{ uint64_t x568; uint8_t _ = _addcarryx_u64(0x0, x566, x551, &x568);
+{ uint64_t _; uint8_t x572 = _addcarryx_u64(0x0, x511, x535, &_);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x514, x553, &x574);
+{ uint64_t x577; uint8_t x578 = _addcarryx_u64(x575, x517, x556, &x577);
+{ uint64_t x580; uint8_t x581 = _addcarryx_u64(x578, x520, x559, &x580);
+{ uint64_t x583; uint8_t x584 = _addcarryx_u64(x581, x523, x562, &x583);
+{ uint64_t x586; uint8_t x587 = _addcarryx_u64(x584, x526, x565, &x586);
+{ uint64_t x589; uint8_t x590 = _addcarryx_u64(x587, x529, x568, &x589);
+{ uint8_t x591 = (x590 + x530);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x15, &x594);
+{ uint64_t x597; uint64_t x596 = _mulx_u64(x12, x17, &x597);
+{ uint64_t x600; uint64_t x599 = _mulx_u64(x12, x19, &x600);
+{ uint64_t x603; uint64_t x602 = _mulx_u64(x12, x21, &x603);
+{ uint64_t x606; uint64_t x605 = _mulx_u64(x12, x23, &x606);
+{ uint64_t x609; uint64_t x608 = _mulx_u64(x12, x22, &x609);
+{ uint64_t x611; uint8_t x612 = _addcarryx_u64(0x0, x594, x596, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(x612, x597, x599, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x600, x602, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x603, x605, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x606, x608, &x623);
+{ uint64_t x626; uint8_t _ = _addcarryx_u64(0x0, x624, x609, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(0x0, x574, x593, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x577, x611, &x632);
+{ uint64_t x635; uint8_t x636 = _addcarryx_u64(x633, x580, x614, &x635);
+{ uint64_t x638; uint8_t x639 = _addcarryx_u64(x636, x583, x617, &x638);
+{ uint64_t x641; uint8_t x642 = _addcarryx_u64(x639, x586, x620, &x641);
+{ uint64_t x644; uint8_t x645 = _addcarryx_u64(x642, x589, x623, &x644);
+{ uint64_t x647; uint8_t x648 = _addcarryx_u64(x645, x591, x626, &x647);
+{ uint64_t _; uint64_t x650 = _mulx_u64(x629, 0xec9e48ae6f71de15L, &_);
+{ uint64_t x654; uint64_t x653 = _mulx_u64(x650, 0xfffffffffffffec3L, &x654);
+{ uint64_t x657; uint64_t x656 = _mulx_u64(x650, 0xffffffffffffffffL, &x657);
+{ uint64_t x660; uint64_t x659 = _mulx_u64(x650, 0xffffffffffffffffL, &x660);
+{ uint64_t x663; uint64_t x662 = _mulx_u64(x650, 0xffffffffffffffffL, &x663);
+{ uint64_t x666; uint64_t x665 = _mulx_u64(x650, 0xffffffffffffffffL, &x666);
+{ uint64_t x669; uint64_t x668 = _mulx_u64(x650, 0xffffffffffffffffL, &x669);
+{ uint64_t x671; uint8_t x672 = _addcarryx_u64(0x0, x654, x656, &x671);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x657, x659, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x660, x662, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x663, x665, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x666, x668, &x683);
+{ uint64_t x686; uint8_t _ = _addcarryx_u64(0x0, x684, x669, &x686);
+{ uint64_t _; uint8_t x690 = _addcarryx_u64(0x0, x629, x653, &_);
+{ uint64_t x692; uint8_t x693 = _addcarryx_u64(x690, x632, x671, &x692);
+{ uint64_t x695; uint8_t x696 = _addcarryx_u64(x693, x635, x674, &x695);
+{ uint64_t x698; uint8_t x699 = _addcarryx_u64(x696, x638, x677, &x698);
+{ uint64_t x701; uint8_t x702 = _addcarryx_u64(x699, x641, x680, &x701);
+{ uint64_t x704; uint8_t x705 = _addcarryx_u64(x702, x644, x683, &x704);
+{ uint64_t x707; uint8_t x708 = _addcarryx_u64(x705, x647, x686, &x707);
+{ uint8_t x709 = (x708 + x648);
+{ uint64_t x711; uint8_t x712 = _subborrow_u64(0x0, x692, 0xfffffffffffffec3L, &x711);
+{ uint64_t x714; uint8_t x715 = _subborrow_u64(x712, x695, 0xffffffffffffffffL, &x714);
+{ uint64_t x717; uint8_t x718 = _subborrow_u64(x715, x698, 0xffffffffffffffffL, &x717);
+{ uint64_t x720; uint8_t x721 = _subborrow_u64(x718, x701, 0xffffffffffffffffL, &x720);
+{ uint64_t x723; uint8_t x724 = _subborrow_u64(x721, x704, 0xffffffffffffffffL, &x723);
+{ uint64_t x726; uint8_t x727 = _subborrow_u64(x724, x707, 0xffffffffffffffffL, &x726);
+{ uint64_t _; uint8_t x730 = _subborrow_u64(x727, x709, 0x0, &_);
+{ uint64_t x731 = cmovznz(x730, x726, x707);
+{ uint64_t x732 = cmovznz(x730, x723, x704);
+{ uint64_t x733 = cmovznz(x730, x720, x701);
+{ uint64_t x734 = cmovznz(x730, x717, x698);
+{ uint64_t x735 = cmovznz(x730, x714, x695);
+{ uint64_t x736 = cmovznz(x730, x711, x692);
+out[0] = x731;
+out[1] = x732;
+out[2] = x733;
+out[3] = x734;
+out[4] = x735;
+out[5] = x736;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m317/femul.h b/src/Specific/montgomery64_2e384m317/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m317/fenz.c b/src/Specific/montgomery64_2e384m317/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e384m317/fenz.h b/src/Specific/montgomery64_2e384m317/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m317/feopp.c b/src/Specific/montgomery64_2e384m317/feopp.c
new file mode 100644
index 000000000..ccfbae4d7
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xfffffffffffffec3L);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m317/feopp.h b/src/Specific/montgomery64_2e384m317/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m317/fesub.c b/src/Specific/montgomery64_2e384m317/fesub.c
new file mode 100644
index 000000000..bc84edeb2
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xfffffffffffffec3L);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m317/fesub.h b/src/Specific/montgomery64_2e384m317/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m317/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c
new file mode 100644
index 000000000..bc9cb1765
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xfffaffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feadd.h b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/femul.c b/src/Specific/montgomery64_2e384m5x2e368m1/femul.c
new file mode 100644
index 000000000..644e34634
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/femul.c
@@ -0,0 +1,266 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xfffaffffffffffffL, &x77);
+{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
+{ uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
+{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
+{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
+{ uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
+{ uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
+{ uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
+{ uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xfffaffffffffffffL, &x191);
+{ uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
+{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
+{ uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
+{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
+{ uint8_t x231 = (x230 + x173);
+{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
+{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
+{ uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
+{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
+{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
+{ uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
+{ uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
+{ uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xfffaffffffffffffL, &x306);
+{ uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
+{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
+{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
+{ uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
+{ uint8_t x346 = (x345 + x288);
+{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
+{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
+{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
+{ uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
+{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+{ uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
+{ uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xfffaffffffffffffL, &x421);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
+{ uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
+{ uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
+{ uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
+{ uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
+{ uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
+{ uint8_t x461 = (x460 + x403);
+{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
+{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
+{ uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
+{ uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
+{ uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
+{ uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
+{ uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
+{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
+{ uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
+{ uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
+{ uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
+{ uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
+{ uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
+{ uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
+{ uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xfffaffffffffffffL, &x536);
+{ uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
+{ uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
+{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
+{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
+{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
+{ uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
+{ uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
+{ uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
+{ uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
+{ uint8_t x576 = (x575 + x518);
+{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
+{ uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
+{ uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
+{ uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
+{ uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
+{ uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
+{ uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
+{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
+{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
+{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
+{ uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
+{ uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
+{ uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
+{ uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
+{ uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
+{ uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
+{ uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
+{ uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xfffaffffffffffffL, &x651);
+{ uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
+{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
+{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
+{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
+{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
+{ uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
+{ uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
+{ uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
+{ uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
+{ uint8_t x691 = (x690 + x633);
+{ uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
+{ uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
+{ uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
+{ uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
+{ uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
+{ uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xfffaffffffffffffL, &x708);
+{ uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
+{ uint64_t x713 = cmovznz(x712, x708, x689);
+{ uint64_t x714 = cmovznz(x712, x705, x686);
+{ uint64_t x715 = cmovznz(x712, x702, x683);
+{ uint64_t x716 = cmovznz(x712, x699, x680);
+{ uint64_t x717 = cmovznz(x712, x696, x677);
+{ uint64_t x718 = cmovznz(x712, x693, x674);
+out[0] = x713;
+out[1] = x714;
+out[2] = x715;
+out[3] = x716;
+out[4] = x717;
+out[5] = x718;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/femul.h b/src/Specific/montgomery64_2e384m5x2e368m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fenz.h b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c
new file mode 100644
index 000000000..4b1126029
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xfffaffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/feopp.h b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c
new file mode 100644
index 000000000..18a509846
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xfffaffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m5x2e368m1/fesub.h b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m5x2e368m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c
new file mode 100644
index 000000000..7344abfcb
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _addcarryx_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _addcarryx_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _addcarryx_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x12, x22, &x40);
+{ uint64_t x43; uint8_t x44 = _subborrow_u64(0x0, x25, 0xffffffffffffffffL, &x43);
+{ uint64_t x46; uint8_t x47 = _subborrow_u64(x44, x28, 0xffffffffffffffffL, &x46);
+{ uint64_t x49; uint8_t x50 = _subborrow_u64(x47, x31, 0xffffffffffffffffL, &x49);
+{ uint64_t x52; uint8_t x53 = _subborrow_u64(x50, x34, 0xffffffffffffffffL, &x52);
+{ uint64_t x55; uint8_t x56 = _subborrow_u64(x53, x37, 0xffffffffffffffffL, &x55);
+{ uint64_t x58; uint8_t x59 = _subborrow_u64(x56, x40, 0xb0ffffffffffffffL, &x58);
+{ uint64_t _; uint8_t x62 = _subborrow_u64(x59, x41, 0x0, &_);
+{ uint64_t x63 = cmovznz(x62, x58, x40);
+{ uint64_t x64 = cmovznz(x62, x55, x37);
+{ uint64_t x65 = cmovznz(x62, x52, x34);
+{ uint64_t x66 = cmovznz(x62, x49, x31);
+{ uint64_t x67 = cmovznz(x62, x46, x28);
+{ uint64_t x68 = cmovznz(x62, x43, x25);
+out[0] = x63;
+out[1] = x64;
+out[2] = x65;
+out[3] = x66;
+out[4] = x67;
+out[5] = x68;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feadd.h b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.h
new file mode 100644
index 000000000..c4cf5692e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/femul.c b/src/Specific/montgomery64_2e384m79x2e376m1/femul.c
new file mode 100644
index 000000000..15d39e5aa
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/femul.c
@@ -0,0 +1,266 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x26; uint64_t x25 = _mulx_u64(x5, x15, &x26);
+{ uint64_t x29; uint64_t x28 = _mulx_u64(x5, x17, &x29);
+{ uint64_t x32; uint64_t x31 = _mulx_u64(x5, x19, &x32);
+{ uint64_t x35; uint64_t x34 = _mulx_u64(x5, x21, &x35);
+{ uint64_t x38; uint64_t x37 = _mulx_u64(x5, x23, &x38);
+{ uint64_t x41; uint64_t x40 = _mulx_u64(x5, x22, &x41);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(0x0, x26, x28, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x29, x31, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x32, x34, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x35, x37, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x38, x40, &x55);
+{ uint64_t x58; uint8_t _ = _addcarryx_u64(0x0, x56, x41, &x58);
+{ uint64_t x62; uint64_t x61 = _mulx_u64(x25, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint64_t x64 = _mulx_u64(x25, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint64_t x67 = _mulx_u64(x25, 0xffffffffffffffffL, &x68);
+{ uint64_t x71; uint64_t x70 = _mulx_u64(x25, 0xffffffffffffffffL, &x71);
+{ uint64_t x74; uint64_t x73 = _mulx_u64(x25, 0xffffffffffffffffL, &x74);
+{ uint64_t x77; uint64_t x76 = _mulx_u64(x25, 0xb0ffffffffffffffL, &x77);
+{ uint64_t x79; uint8_t x80 = _addcarryx_u64(0x0, x62, x64, &x79);
+{ uint64_t x82; uint8_t x83 = _addcarryx_u64(x80, x65, x67, &x82);
+{ uint64_t x85; uint8_t x86 = _addcarryx_u64(x83, x68, x70, &x85);
+{ uint64_t x88; uint8_t x89 = _addcarryx_u64(x86, x71, x73, &x88);
+{ uint64_t x91; uint8_t x92 = _addcarryx_u64(x89, x74, x76, &x91);
+{ uint64_t x94; uint8_t _ = _addcarryx_u64(0x0, x92, x77, &x94);
+{ uint64_t _; uint8_t x98 = _addcarryx_u64(0x0, x25, x61, &_);
+{ uint64_t x100; uint8_t x101 = _addcarryx_u64(x98, x43, x79, &x100);
+{ uint64_t x103; uint8_t x104 = _addcarryx_u64(x101, x46, x82, &x103);
+{ uint64_t x106; uint8_t x107 = _addcarryx_u64(x104, x49, x85, &x106);
+{ uint64_t x109; uint8_t x110 = _addcarryx_u64(x107, x52, x88, &x109);
+{ uint64_t x112; uint8_t x113 = _addcarryx_u64(x110, x55, x91, &x112);
+{ uint64_t x115; uint8_t x116 = _addcarryx_u64(x113, x58, x94, &x115);
+{ uint64_t x119; uint64_t x118 = _mulx_u64(x7, x15, &x119);
+{ uint64_t x122; uint64_t x121 = _mulx_u64(x7, x17, &x122);
+{ uint64_t x125; uint64_t x124 = _mulx_u64(x7, x19, &x125);
+{ uint64_t x128; uint64_t x127 = _mulx_u64(x7, x21, &x128);
+{ uint64_t x131; uint64_t x130 = _mulx_u64(x7, x23, &x131);
+{ uint64_t x134; uint64_t x133 = _mulx_u64(x7, x22, &x134);
+{ uint64_t x136; uint8_t x137 = _addcarryx_u64(0x0, x119, x121, &x136);
+{ uint64_t x139; uint8_t x140 = _addcarryx_u64(x137, x122, x124, &x139);
+{ uint64_t x142; uint8_t x143 = _addcarryx_u64(x140, x125, x127, &x142);
+{ uint64_t x145; uint8_t x146 = _addcarryx_u64(x143, x128, x130, &x145);
+{ uint64_t x148; uint8_t x149 = _addcarryx_u64(x146, x131, x133, &x148);
+{ uint64_t x151; uint8_t _ = _addcarryx_u64(0x0, x149, x134, &x151);
+{ uint64_t x154; uint8_t x155 = _addcarryx_u64(0x0, x100, x118, &x154);
+{ uint64_t x157; uint8_t x158 = _addcarryx_u64(x155, x103, x136, &x157);
+{ uint64_t x160; uint8_t x161 = _addcarryx_u64(x158, x106, x139, &x160);
+{ uint64_t x163; uint8_t x164 = _addcarryx_u64(x161, x109, x142, &x163);
+{ uint64_t x166; uint8_t x167 = _addcarryx_u64(x164, x112, x145, &x166);
+{ uint64_t x169; uint8_t x170 = _addcarryx_u64(x167, x115, x148, &x169);
+{ uint64_t x172; uint8_t x173 = _addcarryx_u64(x170, x116, x151, &x172);
+{ uint64_t x176; uint64_t x175 = _mulx_u64(x154, 0xffffffffffffffffL, &x176);
+{ uint64_t x179; uint64_t x178 = _mulx_u64(x154, 0xffffffffffffffffL, &x179);
+{ uint64_t x182; uint64_t x181 = _mulx_u64(x154, 0xffffffffffffffffL, &x182);
+{ uint64_t x185; uint64_t x184 = _mulx_u64(x154, 0xffffffffffffffffL, &x185);
+{ uint64_t x188; uint64_t x187 = _mulx_u64(x154, 0xffffffffffffffffL, &x188);
+{ uint64_t x191; uint64_t x190 = _mulx_u64(x154, 0xb0ffffffffffffffL, &x191);
+{ uint64_t x193; uint8_t x194 = _addcarryx_u64(0x0, x176, x178, &x193);
+{ uint64_t x196; uint8_t x197 = _addcarryx_u64(x194, x179, x181, &x196);
+{ uint64_t x199; uint8_t x200 = _addcarryx_u64(x197, x182, x184, &x199);
+{ uint64_t x202; uint8_t x203 = _addcarryx_u64(x200, x185, x187, &x202);
+{ uint64_t x205; uint8_t x206 = _addcarryx_u64(x203, x188, x190, &x205);
+{ uint64_t x208; uint8_t _ = _addcarryx_u64(0x0, x206, x191, &x208);
+{ uint64_t _; uint8_t x212 = _addcarryx_u64(0x0, x154, x175, &_);
+{ uint64_t x214; uint8_t x215 = _addcarryx_u64(x212, x157, x193, &x214);
+{ uint64_t x217; uint8_t x218 = _addcarryx_u64(x215, x160, x196, &x217);
+{ uint64_t x220; uint8_t x221 = _addcarryx_u64(x218, x163, x199, &x220);
+{ uint64_t x223; uint8_t x224 = _addcarryx_u64(x221, x166, x202, &x223);
+{ uint64_t x226; uint8_t x227 = _addcarryx_u64(x224, x169, x205, &x226);
+{ uint64_t x229; uint8_t x230 = _addcarryx_u64(x227, x172, x208, &x229);
+{ uint8_t x231 = (x230 + x173);
+{ uint64_t x234; uint64_t x233 = _mulx_u64(x9, x15, &x234);
+{ uint64_t x237; uint64_t x236 = _mulx_u64(x9, x17, &x237);
+{ uint64_t x240; uint64_t x239 = _mulx_u64(x9, x19, &x240);
+{ uint64_t x243; uint64_t x242 = _mulx_u64(x9, x21, &x243);
+{ uint64_t x246; uint64_t x245 = _mulx_u64(x9, x23, &x246);
+{ uint64_t x249; uint64_t x248 = _mulx_u64(x9, x22, &x249);
+{ uint64_t x251; uint8_t x252 = _addcarryx_u64(0x0, x234, x236, &x251);
+{ uint64_t x254; uint8_t x255 = _addcarryx_u64(x252, x237, x239, &x254);
+{ uint64_t x257; uint8_t x258 = _addcarryx_u64(x255, x240, x242, &x257);
+{ uint64_t x260; uint8_t x261 = _addcarryx_u64(x258, x243, x245, &x260);
+{ uint64_t x263; uint8_t x264 = _addcarryx_u64(x261, x246, x248, &x263);
+{ uint64_t x266; uint8_t _ = _addcarryx_u64(0x0, x264, x249, &x266);
+{ uint64_t x269; uint8_t x270 = _addcarryx_u64(0x0, x214, x233, &x269);
+{ uint64_t x272; uint8_t x273 = _addcarryx_u64(x270, x217, x251, &x272);
+{ uint64_t x275; uint8_t x276 = _addcarryx_u64(x273, x220, x254, &x275);
+{ uint64_t x278; uint8_t x279 = _addcarryx_u64(x276, x223, x257, &x278);
+{ uint64_t x281; uint8_t x282 = _addcarryx_u64(x279, x226, x260, &x281);
+{ uint64_t x284; uint8_t x285 = _addcarryx_u64(x282, x229, x263, &x284);
+{ uint64_t x287; uint8_t x288 = _addcarryx_u64(x285, x231, x266, &x287);
+{ uint64_t x291; uint64_t x290 = _mulx_u64(x269, 0xffffffffffffffffL, &x291);
+{ uint64_t x294; uint64_t x293 = _mulx_u64(x269, 0xffffffffffffffffL, &x294);
+{ uint64_t x297; uint64_t x296 = _mulx_u64(x269, 0xffffffffffffffffL, &x297);
+{ uint64_t x300; uint64_t x299 = _mulx_u64(x269, 0xffffffffffffffffL, &x300);
+{ uint64_t x303; uint64_t x302 = _mulx_u64(x269, 0xffffffffffffffffL, &x303);
+{ uint64_t x306; uint64_t x305 = _mulx_u64(x269, 0xb0ffffffffffffffL, &x306);
+{ uint64_t x308; uint8_t x309 = _addcarryx_u64(0x0, x291, x293, &x308);
+{ uint64_t x311; uint8_t x312 = _addcarryx_u64(x309, x294, x296, &x311);
+{ uint64_t x314; uint8_t x315 = _addcarryx_u64(x312, x297, x299, &x314);
+{ uint64_t x317; uint8_t x318 = _addcarryx_u64(x315, x300, x302, &x317);
+{ uint64_t x320; uint8_t x321 = _addcarryx_u64(x318, x303, x305, &x320);
+{ uint64_t x323; uint8_t _ = _addcarryx_u64(0x0, x321, x306, &x323);
+{ uint64_t _; uint8_t x327 = _addcarryx_u64(0x0, x269, x290, &_);
+{ uint64_t x329; uint8_t x330 = _addcarryx_u64(x327, x272, x308, &x329);
+{ uint64_t x332; uint8_t x333 = _addcarryx_u64(x330, x275, x311, &x332);
+{ uint64_t x335; uint8_t x336 = _addcarryx_u64(x333, x278, x314, &x335);
+{ uint64_t x338; uint8_t x339 = _addcarryx_u64(x336, x281, x317, &x338);
+{ uint64_t x341; uint8_t x342 = _addcarryx_u64(x339, x284, x320, &x341);
+{ uint64_t x344; uint8_t x345 = _addcarryx_u64(x342, x287, x323, &x344);
+{ uint8_t x346 = (x345 + x288);
+{ uint64_t x349; uint64_t x348 = _mulx_u64(x11, x15, &x349);
+{ uint64_t x352; uint64_t x351 = _mulx_u64(x11, x17, &x352);
+{ uint64_t x355; uint64_t x354 = _mulx_u64(x11, x19, &x355);
+{ uint64_t x358; uint64_t x357 = _mulx_u64(x11, x21, &x358);
+{ uint64_t x361; uint64_t x360 = _mulx_u64(x11, x23, &x361);
+{ uint64_t x364; uint64_t x363 = _mulx_u64(x11, x22, &x364);
+{ uint64_t x366; uint8_t x367 = _addcarryx_u64(0x0, x349, x351, &x366);
+{ uint64_t x369; uint8_t x370 = _addcarryx_u64(x367, x352, x354, &x369);
+{ uint64_t x372; uint8_t x373 = _addcarryx_u64(x370, x355, x357, &x372);
+{ uint64_t x375; uint8_t x376 = _addcarryx_u64(x373, x358, x360, &x375);
+{ uint64_t x378; uint8_t x379 = _addcarryx_u64(x376, x361, x363, &x378);
+{ uint64_t x381; uint8_t _ = _addcarryx_u64(0x0, x379, x364, &x381);
+{ uint64_t x384; uint8_t x385 = _addcarryx_u64(0x0, x329, x348, &x384);
+{ uint64_t x387; uint8_t x388 = _addcarryx_u64(x385, x332, x366, &x387);
+{ uint64_t x390; uint8_t x391 = _addcarryx_u64(x388, x335, x369, &x390);
+{ uint64_t x393; uint8_t x394 = _addcarryx_u64(x391, x338, x372, &x393);
+{ uint64_t x396; uint8_t x397 = _addcarryx_u64(x394, x341, x375, &x396);
+{ uint64_t x399; uint8_t x400 = _addcarryx_u64(x397, x344, x378, &x399);
+{ uint64_t x402; uint8_t x403 = _addcarryx_u64(x400, x346, x381, &x402);
+{ uint64_t x406; uint64_t x405 = _mulx_u64(x384, 0xffffffffffffffffL, &x406);
+{ uint64_t x409; uint64_t x408 = _mulx_u64(x384, 0xffffffffffffffffL, &x409);
+{ uint64_t x412; uint64_t x411 = _mulx_u64(x384, 0xffffffffffffffffL, &x412);
+{ uint64_t x415; uint64_t x414 = _mulx_u64(x384, 0xffffffffffffffffL, &x415);
+{ uint64_t x418; uint64_t x417 = _mulx_u64(x384, 0xffffffffffffffffL, &x418);
+{ uint64_t x421; uint64_t x420 = _mulx_u64(x384, 0xb0ffffffffffffffL, &x421);
+{ uint64_t x423; uint8_t x424 = _addcarryx_u64(0x0, x406, x408, &x423);
+{ uint64_t x426; uint8_t x427 = _addcarryx_u64(x424, x409, x411, &x426);
+{ uint64_t x429; uint8_t x430 = _addcarryx_u64(x427, x412, x414, &x429);
+{ uint64_t x432; uint8_t x433 = _addcarryx_u64(x430, x415, x417, &x432);
+{ uint64_t x435; uint8_t x436 = _addcarryx_u64(x433, x418, x420, &x435);
+{ uint64_t x438; uint8_t _ = _addcarryx_u64(0x0, x436, x421, &x438);
+{ uint64_t _; uint8_t x442 = _addcarryx_u64(0x0, x384, x405, &_);
+{ uint64_t x444; uint8_t x445 = _addcarryx_u64(x442, x387, x423, &x444);
+{ uint64_t x447; uint8_t x448 = _addcarryx_u64(x445, x390, x426, &x447);
+{ uint64_t x450; uint8_t x451 = _addcarryx_u64(x448, x393, x429, &x450);
+{ uint64_t x453; uint8_t x454 = _addcarryx_u64(x451, x396, x432, &x453);
+{ uint64_t x456; uint8_t x457 = _addcarryx_u64(x454, x399, x435, &x456);
+{ uint64_t x459; uint8_t x460 = _addcarryx_u64(x457, x402, x438, &x459);
+{ uint8_t x461 = (x460 + x403);
+{ uint64_t x464; uint64_t x463 = _mulx_u64(x13, x15, &x464);
+{ uint64_t x467; uint64_t x466 = _mulx_u64(x13, x17, &x467);
+{ uint64_t x470; uint64_t x469 = _mulx_u64(x13, x19, &x470);
+{ uint64_t x473; uint64_t x472 = _mulx_u64(x13, x21, &x473);
+{ uint64_t x476; uint64_t x475 = _mulx_u64(x13, x23, &x476);
+{ uint64_t x479; uint64_t x478 = _mulx_u64(x13, x22, &x479);
+{ uint64_t x481; uint8_t x482 = _addcarryx_u64(0x0, x464, x466, &x481);
+{ uint64_t x484; uint8_t x485 = _addcarryx_u64(x482, x467, x469, &x484);
+{ uint64_t x487; uint8_t x488 = _addcarryx_u64(x485, x470, x472, &x487);
+{ uint64_t x490; uint8_t x491 = _addcarryx_u64(x488, x473, x475, &x490);
+{ uint64_t x493; uint8_t x494 = _addcarryx_u64(x491, x476, x478, &x493);
+{ uint64_t x496; uint8_t _ = _addcarryx_u64(0x0, x494, x479, &x496);
+{ uint64_t x499; uint8_t x500 = _addcarryx_u64(0x0, x444, x463, &x499);
+{ uint64_t x502; uint8_t x503 = _addcarryx_u64(x500, x447, x481, &x502);
+{ uint64_t x505; uint8_t x506 = _addcarryx_u64(x503, x450, x484, &x505);
+{ uint64_t x508; uint8_t x509 = _addcarryx_u64(x506, x453, x487, &x508);
+{ uint64_t x511; uint8_t x512 = _addcarryx_u64(x509, x456, x490, &x511);
+{ uint64_t x514; uint8_t x515 = _addcarryx_u64(x512, x459, x493, &x514);
+{ uint64_t x517; uint8_t x518 = _addcarryx_u64(x515, x461, x496, &x517);
+{ uint64_t x521; uint64_t x520 = _mulx_u64(x499, 0xffffffffffffffffL, &x521);
+{ uint64_t x524; uint64_t x523 = _mulx_u64(x499, 0xffffffffffffffffL, &x524);
+{ uint64_t x527; uint64_t x526 = _mulx_u64(x499, 0xffffffffffffffffL, &x527);
+{ uint64_t x530; uint64_t x529 = _mulx_u64(x499, 0xffffffffffffffffL, &x530);
+{ uint64_t x533; uint64_t x532 = _mulx_u64(x499, 0xffffffffffffffffL, &x533);
+{ uint64_t x536; uint64_t x535 = _mulx_u64(x499, 0xb0ffffffffffffffL, &x536);
+{ uint64_t x538; uint8_t x539 = _addcarryx_u64(0x0, x521, x523, &x538);
+{ uint64_t x541; uint8_t x542 = _addcarryx_u64(x539, x524, x526, &x541);
+{ uint64_t x544; uint8_t x545 = _addcarryx_u64(x542, x527, x529, &x544);
+{ uint64_t x547; uint8_t x548 = _addcarryx_u64(x545, x530, x532, &x547);
+{ uint64_t x550; uint8_t x551 = _addcarryx_u64(x548, x533, x535, &x550);
+{ uint64_t x553; uint8_t _ = _addcarryx_u64(0x0, x551, x536, &x553);
+{ uint64_t _; uint8_t x557 = _addcarryx_u64(0x0, x499, x520, &_);
+{ uint64_t x559; uint8_t x560 = _addcarryx_u64(x557, x502, x538, &x559);
+{ uint64_t x562; uint8_t x563 = _addcarryx_u64(x560, x505, x541, &x562);
+{ uint64_t x565; uint8_t x566 = _addcarryx_u64(x563, x508, x544, &x565);
+{ uint64_t x568; uint8_t x569 = _addcarryx_u64(x566, x511, x547, &x568);
+{ uint64_t x571; uint8_t x572 = _addcarryx_u64(x569, x514, x550, &x571);
+{ uint64_t x574; uint8_t x575 = _addcarryx_u64(x572, x517, x553, &x574);
+{ uint8_t x576 = (x575 + x518);
+{ uint64_t x579; uint64_t x578 = _mulx_u64(x12, x15, &x579);
+{ uint64_t x582; uint64_t x581 = _mulx_u64(x12, x17, &x582);
+{ uint64_t x585; uint64_t x584 = _mulx_u64(x12, x19, &x585);
+{ uint64_t x588; uint64_t x587 = _mulx_u64(x12, x21, &x588);
+{ uint64_t x591; uint64_t x590 = _mulx_u64(x12, x23, &x591);
+{ uint64_t x594; uint64_t x593 = _mulx_u64(x12, x22, &x594);
+{ uint64_t x596; uint8_t x597 = _addcarryx_u64(0x0, x579, x581, &x596);
+{ uint64_t x599; uint8_t x600 = _addcarryx_u64(x597, x582, x584, &x599);
+{ uint64_t x602; uint8_t x603 = _addcarryx_u64(x600, x585, x587, &x602);
+{ uint64_t x605; uint8_t x606 = _addcarryx_u64(x603, x588, x590, &x605);
+{ uint64_t x608; uint8_t x609 = _addcarryx_u64(x606, x591, x593, &x608);
+{ uint64_t x611; uint8_t _ = _addcarryx_u64(0x0, x609, x594, &x611);
+{ uint64_t x614; uint8_t x615 = _addcarryx_u64(0x0, x559, x578, &x614);
+{ uint64_t x617; uint8_t x618 = _addcarryx_u64(x615, x562, x596, &x617);
+{ uint64_t x620; uint8_t x621 = _addcarryx_u64(x618, x565, x599, &x620);
+{ uint64_t x623; uint8_t x624 = _addcarryx_u64(x621, x568, x602, &x623);
+{ uint64_t x626; uint8_t x627 = _addcarryx_u64(x624, x571, x605, &x626);
+{ uint64_t x629; uint8_t x630 = _addcarryx_u64(x627, x574, x608, &x629);
+{ uint64_t x632; uint8_t x633 = _addcarryx_u64(x630, x576, x611, &x632);
+{ uint64_t x636; uint64_t x635 = _mulx_u64(x614, 0xffffffffffffffffL, &x636);
+{ uint64_t x639; uint64_t x638 = _mulx_u64(x614, 0xffffffffffffffffL, &x639);
+{ uint64_t x642; uint64_t x641 = _mulx_u64(x614, 0xffffffffffffffffL, &x642);
+{ uint64_t x645; uint64_t x644 = _mulx_u64(x614, 0xffffffffffffffffL, &x645);
+{ uint64_t x648; uint64_t x647 = _mulx_u64(x614, 0xffffffffffffffffL, &x648);
+{ uint64_t x651; uint64_t x650 = _mulx_u64(x614, 0xb0ffffffffffffffL, &x651);
+{ uint64_t x653; uint8_t x654 = _addcarryx_u64(0x0, x636, x638, &x653);
+{ uint64_t x656; uint8_t x657 = _addcarryx_u64(x654, x639, x641, &x656);
+{ uint64_t x659; uint8_t x660 = _addcarryx_u64(x657, x642, x644, &x659);
+{ uint64_t x662; uint8_t x663 = _addcarryx_u64(x660, x645, x647, &x662);
+{ uint64_t x665; uint8_t x666 = _addcarryx_u64(x663, x648, x650, &x665);
+{ uint64_t x668; uint8_t _ = _addcarryx_u64(0x0, x666, x651, &x668);
+{ uint64_t _; uint8_t x672 = _addcarryx_u64(0x0, x614, x635, &_);
+{ uint64_t x674; uint8_t x675 = _addcarryx_u64(x672, x617, x653, &x674);
+{ uint64_t x677; uint8_t x678 = _addcarryx_u64(x675, x620, x656, &x677);
+{ uint64_t x680; uint8_t x681 = _addcarryx_u64(x678, x623, x659, &x680);
+{ uint64_t x683; uint8_t x684 = _addcarryx_u64(x681, x626, x662, &x683);
+{ uint64_t x686; uint8_t x687 = _addcarryx_u64(x684, x629, x665, &x686);
+{ uint64_t x689; uint8_t x690 = _addcarryx_u64(x687, x632, x668, &x689);
+{ uint8_t x691 = (x690 + x633);
+{ uint64_t x693; uint8_t x694 = _subborrow_u64(0x0, x674, 0xffffffffffffffffL, &x693);
+{ uint64_t x696; uint8_t x697 = _subborrow_u64(x694, x677, 0xffffffffffffffffL, &x696);
+{ uint64_t x699; uint8_t x700 = _subborrow_u64(x697, x680, 0xffffffffffffffffL, &x699);
+{ uint64_t x702; uint8_t x703 = _subborrow_u64(x700, x683, 0xffffffffffffffffL, &x702);
+{ uint64_t x705; uint8_t x706 = _subborrow_u64(x703, x686, 0xffffffffffffffffL, &x705);
+{ uint64_t x708; uint8_t x709 = _subborrow_u64(x706, x689, 0xb0ffffffffffffffL, &x708);
+{ uint64_t _; uint8_t x712 = _subborrow_u64(x709, x691, 0x0, &_);
+{ uint64_t x713 = cmovznz(x712, x708, x689);
+{ uint64_t x714 = cmovznz(x712, x705, x686);
+{ uint64_t x715 = cmovznz(x712, x702, x683);
+{ uint64_t x716 = cmovznz(x712, x699, x680);
+{ uint64_t x717 = cmovznz(x712, x696, x677);
+{ uint64_t x718 = cmovznz(x712, x693, x674);
+out[0] = x713;
+out[1] = x714;
+out[2] = x715;
+out[3] = x716;
+out[4] = x717;
+out[5] = x718;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/femul.h b/src/Specific/montgomery64_2e384m79x2e376m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c
new file mode 100644
index 000000000..ad3763a14
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.c
@@ -0,0 +1,27 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (x10 | x9);
+{ uint64_t x12 = (x8 | x11);
+{ uint64_t x13 = (x6 | x12);
+{ uint64_t x14 = (x4 | x13);
+{ uint64_t x15 = (x2 | x14);
+out[0] = x15;
+}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fenz.h b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.h
new file mode 100644
index 000000000..c6d44e4a7
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c
new file mode 100644
index 000000000..eb7d95bb5
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feopp.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x12; uint8_t x13 = _subborrow_u64(0x0, 0x0, x2, &x12);
+{ uint64_t x15; uint8_t x16 = _subborrow_u64(x13, 0x0, x4, &x15);
+{ uint64_t x18; uint8_t x19 = _subborrow_u64(x16, 0x0, x6, &x18);
+{ uint64_t x21; uint8_t x22 = _subborrow_u64(x19, 0x0, x8, &x21);
+{ uint64_t x24; uint8_t x25 = _subborrow_u64(x22, 0x0, x10, &x24);
+{ uint64_t x27; uint8_t x28 = _subborrow_u64(x25, 0x0, x9, &x27);
+{ uint64_t x29 = (uint64_t)cmovznz(x28, 0x0, 0xffffffffffffffffL);
+{ uint64_t x30 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(0x0, x12, x30, &x32);
+{ uint64_t x34 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x33, x15, x34, &x36);
+{ uint64_t x38 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x37, x18, x38, &x40);
+{ uint64_t x42 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x41, x21, x42, &x44);
+{ uint64_t x46 = (x29 & 0xffffffffffffffffL);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x45, x24, x46, &x48);
+{ uint64_t x50 = (x29 & 0xb0ffffffffffffffL);
+{ uint64_t x52; uint8_t _ = _addcarryx_u64(x49, x27, x50, &x52);
+out[0] = x52;
+out[1] = x48;
+out[2] = x44;
+out[3] = x40;
+out[4] = x36;
+out[5] = x32;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/feopp.h b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.h
new file mode 100644
index 000000000..4fd8feee4
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/feopp.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feopp(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c
new file mode 100644
index 000000000..5946b461b
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesub.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x25; uint8_t x26 = _subborrow_u64(0x0, x5, x15, &x25);
+{ uint64_t x28; uint8_t x29 = _subborrow_u64(x26, x7, x17, &x28);
+{ uint64_t x31; uint8_t x32 = _subborrow_u64(x29, x9, x19, &x31);
+{ uint64_t x34; uint8_t x35 = _subborrow_u64(x32, x11, x21, &x34);
+{ uint64_t x37; uint8_t x38 = _subborrow_u64(x35, x13, x23, &x37);
+{ uint64_t x40; uint8_t x41 = _subborrow_u64(x38, x12, x22, &x40);
+{ uint64_t x42 = (uint64_t)cmovznz(x41, 0x0, 0xffffffffffffffffL);
+{ uint64_t x43 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(0x0, x25, x43, &x45);
+{ uint64_t x47 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x46, x28, x47, &x49);
+{ uint64_t x51 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x53; uint8_t x54 = _addcarryx_u64(x50, x31, x51, &x53);
+{ uint64_t x55 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x57; uint8_t x58 = _addcarryx_u64(x54, x34, x55, &x57);
+{ uint64_t x59 = (x42 & 0xffffffffffffffffL);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x58, x37, x59, &x61);
+{ uint64_t x63 = (x42 & 0xb0ffffffffffffffL);
+{ uint64_t x65; uint8_t _ = _addcarryx_u64(x62, x40, x63, &x65);
+out[0] = x65;
+out[1] = x61;
+out[2] = x57;
+out[3] = x53;
+out[4] = x49;
+out[5] = x45;
+}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/montgomery64_2e384m79x2e376m1/fesub.h b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.h
new file mode 100644
index 000000000..e9c4bd677
--- /dev/null
+++ b/src/Specific/montgomery64_2e384m79x2e376m1/fesub.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesub(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/montgomery64_2e389m21/feadd.c b/src/Specific/montgomery64_2e389m21/feadd.c
new file mode 100644
index 000000000..dacadb407
--- /dev/null
+++ b/src/Specific/montgomery64_2e389m21/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1f, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e389m21/feadd.h b/src/Specific/montgomery64_2e389m21/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e389m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e389m21/fenz.c b/src/Specific/montgomery64_2e389m21/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e389m21/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e389m21/fenz.h b/src/Specific/montgomery64_2e389m21/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e389m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e401m31/feadd.c b/src/Specific/montgomery64_2e401m31/feadd.c
new file mode 100644
index 000000000..d6917e255
--- /dev/null
+++ b/src/Specific/montgomery64_2e401m31/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffe1L, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1ffff, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e401m31/feadd.h b/src/Specific/montgomery64_2e401m31/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e401m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e401m31/fenz.c b/src/Specific/montgomery64_2e401m31/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e401m31/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e401m31/fenz.h b/src/Specific/montgomery64_2e401m31/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e401m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e413m21/feadd.c b/src/Specific/montgomery64_2e413m21/feadd.c
new file mode 100644
index 000000000..50ddce7b2
--- /dev/null
+++ b/src/Specific/montgomery64_2e413m21/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffebL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x1fffffff, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e413m21/feadd.h b/src/Specific/montgomery64_2e413m21/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e413m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e413m21/fenz.c b/src/Specific/montgomery64_2e413m21/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e413m21/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e413m21/fenz.h b/src/Specific/montgomery64_2e413m21/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e413m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e414m17/feadd.c b/src/Specific/montgomery64_2e414m17/feadd.c
new file mode 100644
index 000000000..2687b8408
--- /dev/null
+++ b/src/Specific/montgomery64_2e414m17/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0x3fffffff, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e414m17/feadd.h b/src/Specific/montgomery64_2e414m17/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e414m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e414m17/fenz.c b/src/Specific/montgomery64_2e414m17/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e414m17/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e414m17/fenz.h b/src/Specific/montgomery64_2e414m17/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e414m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e416m2e208m1/feadd.c b/src/Specific/montgomery64_2e416m2e208m1/feadd.c
new file mode 100644
index 000000000..aff57e2db
--- /dev/null
+++ b/src/Specific/montgomery64_2e416m2e208m1/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffffffeffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffff, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e416m2e208m1/feadd.h b/src/Specific/montgomery64_2e416m2e208m1/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e416m2e208m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e416m2e208m1/fenz.c b/src/Specific/montgomery64_2e416m2e208m1/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e416m2e208m1/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e416m2e208m1/fenz.h b/src/Specific/montgomery64_2e416m2e208m1/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e416m2e208m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e444m17/feadd.c b/src/Specific/montgomery64_2e444m17/feadd.c
new file mode 100644
index 000000000..264e08caa
--- /dev/null
+++ b/src/Specific/montgomery64_2e444m17/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffefL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xffffffffffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xfffffffffffffff, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e444m17/feadd.h b/src/Specific/montgomery64_2e444m17/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e444m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e444m17/fenz.c b/src/Specific/montgomery64_2e444m17/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e444m17/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e444m17/fenz.h b/src/Specific/montgomery64_2e444m17/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e444m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e448m2e224m1/feadd.c b/src/Specific/montgomery64_2e448m2e224m1/feadd.c
new file mode 100644
index 000000000..8b9e9f114
--- /dev/null
+++ b/src/Specific/montgomery64_2e448m2e224m1/feadd.c
@@ -0,0 +1,50 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x29; uint8_t x30 = _addcarryx_u64(0x0, x5, x17, &x29);
+{ uint64_t x32; uint8_t x33 = _addcarryx_u64(x30, x7, x19, &x32);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u64(x33, x9, x21, &x35);
+{ uint64_t x38; uint8_t x39 = _addcarryx_u64(x36, x11, x23, &x38);
+{ uint64_t x41; uint8_t x42 = _addcarryx_u64(x39, x13, x25, &x41);
+{ uint64_t x44; uint8_t x45 = _addcarryx_u64(x42, x15, x27, &x44);
+{ uint64_t x47; uint8_t x48 = _addcarryx_u64(x45, x14, x26, &x47);
+{ uint64_t x50; uint8_t x51 = _subborrow_u64(0x0, x29, 0xffffffffffffffffL, &x50);
+{ uint64_t x53; uint8_t x54 = _subborrow_u64(x51, x32, 0xffffffffffffffffL, &x53);
+{ uint64_t x56; uint8_t x57 = _subborrow_u64(x54, x35, 0xffffffffffffffffL, &x56);
+{ uint64_t x59; uint8_t x60 = _subborrow_u64(x57, x38, 0xfffffffeffffffffL, &x59);
+{ uint64_t x62; uint8_t x63 = _subborrow_u64(x60, x41, 0xffffffffffffffffL, &x62);
+{ uint64_t x65; uint8_t x66 = _subborrow_u64(x63, x44, 0xffffffffffffffffL, &x65);
+{ uint64_t x68; uint8_t x69 = _subborrow_u64(x66, x47, 0xffffffffffffffffL, &x68);
+{ uint64_t _; uint8_t x72 = _subborrow_u64(x69, x48, 0x0, &_);
+{ uint64_t x73 = cmovznz(x72, x68, x47);
+{ uint64_t x74 = cmovznz(x72, x65, x44);
+{ uint64_t x75 = cmovznz(x72, x62, x41);
+{ uint64_t x76 = cmovznz(x72, x59, x38);
+{ uint64_t x77 = cmovznz(x72, x56, x35);
+{ uint64_t x78 = cmovznz(x72, x53, x32);
+{ uint64_t x79 = cmovznz(x72, x50, x29);
+out[0] = x73;
+out[1] = x74;
+out[2] = x75;
+out[3] = x76;
+out[4] = x77;
+out[5] = x78;
+out[6] = x79;
+}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/montgomery64_2e448m2e224m1/feadd.h b/src/Specific/montgomery64_2e448m2e224m1/feadd.h
new file mode 100644
index 000000000..9e96b396b
--- /dev/null
+++ b/src/Specific/montgomery64_2e448m2e224m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/montgomery64_2e448m2e224m1/fenz.c b/src/Specific/montgomery64_2e448m2e224m1/fenz.c
new file mode 100644
index 000000000..428b446d0
--- /dev/null
+++ b/src/Specific/montgomery64_2e448m2e224m1/fenz.c
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (x12 | x11);
+{ uint64_t x14 = (x10 | x13);
+{ uint64_t x15 = (x8 | x14);
+{ uint64_t x16 = (x6 | x15);
+{ uint64_t x17 = (x4 | x16);
+{ uint64_t x18 = (x2 | x17);
+out[0] = x18;
+}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e448m2e224m1/fenz.h b/src/Specific/montgomery64_2e448m2e224m1/fenz.h
new file mode 100644
index 000000000..2cafcda9d
--- /dev/null
+++ b/src/Specific/montgomery64_2e448m2e224m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e450m2e225m1/feadd.c b/src/Specific/montgomery64_2e450m2e225m1/feadd.c
new file mode 100644
index 000000000..2e0e920dc
--- /dev/null
+++ b/src/Specific/montgomery64_2e450m2e225m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffffffdffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e450m2e225m1/feadd.h b/src/Specific/montgomery64_2e450m2e225m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e450m2e225m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e450m2e225m1/fenz.c b/src/Specific/montgomery64_2e450m2e225m1/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e450m2e225m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e450m2e225m1/fenz.h b/src/Specific/montgomery64_2e450m2e225m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e450m2e225m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e452m3/feadd.c b/src/Specific/montgomery64_2e452m3/feadd.c
new file mode 100644
index 000000000..d9febb228
--- /dev/null
+++ b/src/Specific/montgomery64_2e452m3/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffffdL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xf, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e452m3/feadd.h b/src/Specific/montgomery64_2e452m3/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e452m3/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e452m3/fenz.c b/src/Specific/montgomery64_2e452m3/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e452m3/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e452m3/fenz.h b/src/Specific/montgomery64_2e452m3/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e452m3/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e468m17/feadd.c b/src/Specific/montgomery64_2e468m17/feadd.c
new file mode 100644
index 000000000..4f46ae1b3
--- /dev/null
+++ b/src/Specific/montgomery64_2e468m17/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e468m17/feadd.h b/src/Specific/montgomery64_2e468m17/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e468m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e468m17/fenz.c b/src/Specific/montgomery64_2e468m17/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e468m17/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e468m17/fenz.h b/src/Specific/montgomery64_2e468m17/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e468m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e480m2e240m1/feadd.c b/src/Specific/montgomery64_2e480m2e240m1/feadd.c
new file mode 100644
index 000000000..a8bb326bd
--- /dev/null
+++ b/src/Specific/montgomery64_2e480m2e240m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xfffeffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e480m2e240m1/feadd.h b/src/Specific/montgomery64_2e480m2e240m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e480m2e240m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e480m2e240m1/fenz.c b/src/Specific/montgomery64_2e480m2e240m1/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e480m2e240m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e480m2e240m1/fenz.h b/src/Specific/montgomery64_2e480m2e240m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e480m2e240m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e488m17/feadd.c b/src/Specific/montgomery64_2e488m17/feadd.c
new file mode 100644
index 000000000..2c13a513b
--- /dev/null
+++ b/src/Specific/montgomery64_2e488m17/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffefL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e488m17/feadd.h b/src/Specific/montgomery64_2e488m17/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e488m17/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e488m17/fenz.c b/src/Specific/montgomery64_2e488m17/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e488m17/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e488m17/fenz.h b/src/Specific/montgomery64_2e488m17/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e488m17/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e489m21/feadd.c b/src/Specific/montgomery64_2e489m21/feadd.c
new file mode 100644
index 000000000..af6a0c4ec
--- /dev/null
+++ b/src/Specific/montgomery64_2e489m21/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffebL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x1ffffffffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e489m21/feadd.h b/src/Specific/montgomery64_2e489m21/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e489m21/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e489m21/fenz.c b/src/Specific/montgomery64_2e489m21/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e489m21/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e489m21/fenz.h b/src/Specific/montgomery64_2e489m21/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e489m21/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e495m31/feadd.c b/src/Specific/montgomery64_2e495m31/feadd.c
new file mode 100644
index 000000000..25392f503
--- /dev/null
+++ b/src/Specific/montgomery64_2e495m31/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffe1L, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e495m31/feadd.h b/src/Specific/montgomery64_2e495m31/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e495m31/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e495m31/fenz.c b/src/Specific/montgomery64_2e495m31/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e495m31/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e495m31/fenz.h b/src/Specific/montgomery64_2e495m31/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e495m31/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c
new file mode 100644
index 000000000..284a47d81
--- /dev/null
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x3eddffffffffffff, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/feadd.h b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e510m290x2e496m1/fenz.h b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e510m290x2e496m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e511m187/feadd.c b/src/Specific/montgomery64_2e511m187/feadd.c
new file mode 100644
index 000000000..77d131494
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m187/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffff45L, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e511m187/feadd.h b/src/Specific/montgomery64_2e511m187/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m187/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e511m187/fenz.c b/src/Specific/montgomery64_2e511m187/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m187/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e511m187/fenz.h b/src/Specific/montgomery64_2e511m187/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m187/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e511m481/feadd.c b/src/Specific/montgomery64_2e511m481/feadd.c
new file mode 100644
index 000000000..b26d4482c
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m481/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffe1fL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0x7fffffffffffffffL, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e511m481/feadd.h b/src/Specific/montgomery64_2e511m481/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m481/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e511m481/fenz.c b/src/Specific/montgomery64_2e511m481/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m481/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e511m481/fenz.h b/src/Specific/montgomery64_2e511m481/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e511m481/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c
new file mode 100644
index 000000000..4421e6ab5
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xffffffffffffffffL, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xfe14ffffffffffffL, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/feadd.h b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e512m491x2e496m1/fenz.h b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m491x2e496m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e512m569/feadd.c b/src/Specific/montgomery64_2e512m569/feadd.c
new file mode 100644
index 000000000..d6e5329b2
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m569/feadd.c
@@ -0,0 +1,54 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x33; uint8_t x34 = _addcarryx_u64(0x0, x5, x19, &x33);
+{ uint64_t x36; uint8_t x37 = _addcarryx_u64(x34, x7, x21, &x36);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u64(x37, x9, x23, &x39);
+{ uint64_t x42; uint8_t x43 = _addcarryx_u64(x40, x11, x25, &x42);
+{ uint64_t x45; uint8_t x46 = _addcarryx_u64(x43, x13, x27, &x45);
+{ uint64_t x48; uint8_t x49 = _addcarryx_u64(x46, x15, x29, &x48);
+{ uint64_t x51; uint8_t x52 = _addcarryx_u64(x49, x17, x31, &x51);
+{ uint64_t x54; uint8_t x55 = _addcarryx_u64(x52, x16, x30, &x54);
+{ uint64_t x57; uint8_t x58 = _subborrow_u64(0x0, x33, 0xfffffffffffffdc7L, &x57);
+{ uint64_t x60; uint8_t x61 = _subborrow_u64(x58, x36, 0xffffffffffffffffL, &x60);
+{ uint64_t x63; uint8_t x64 = _subborrow_u64(x61, x39, 0xffffffffffffffffL, &x63);
+{ uint64_t x66; uint8_t x67 = _subborrow_u64(x64, x42, 0xffffffffffffffffL, &x66);
+{ uint64_t x69; uint8_t x70 = _subborrow_u64(x67, x45, 0xffffffffffffffffL, &x69);
+{ uint64_t x72; uint8_t x73 = _subborrow_u64(x70, x48, 0xffffffffffffffffL, &x72);
+{ uint64_t x75; uint8_t x76 = _subborrow_u64(x73, x51, 0xffffffffffffffffL, &x75);
+{ uint64_t x78; uint8_t x79 = _subborrow_u64(x76, x54, 0xffffffffffffffffL, &x78);
+{ uint64_t _; uint8_t x82 = _subborrow_u64(x79, x55, 0x0, &_);
+{ uint64_t x83 = cmovznz(x82, x78, x54);
+{ uint64_t x84 = cmovznz(x82, x75, x51);
+{ uint64_t x85 = cmovznz(x82, x72, x48);
+{ uint64_t x86 = cmovznz(x82, x69, x45);
+{ uint64_t x87 = cmovznz(x82, x66, x42);
+{ uint64_t x88 = cmovznz(x82, x63, x39);
+{ uint64_t x89 = cmovznz(x82, x60, x36);
+{ uint64_t x90 = cmovznz(x82, x57, x33);
+out[0] = x83;
+out[1] = x84;
+out[2] = x85;
+out[3] = x86;
+out[4] = x87;
+out[5] = x88;
+out[6] = x89;
+out[7] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/montgomery64_2e512m569/feadd.h b/src/Specific/montgomery64_2e512m569/feadd.h
new file mode 100644
index 000000000..6156ca61a
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m569/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/montgomery64_2e512m569/fenz.c b/src/Specific/montgomery64_2e512m569/fenz.c
new file mode 100644
index 000000000..72878ca19
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m569/fenz.c
@@ -0,0 +1,29 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (x14 | x13);
+{ uint64_t x16 = (x12 | x15);
+{ uint64_t x17 = (x10 | x16);
+{ uint64_t x18 = (x8 | x17);
+{ uint64_t x19 = (x6 | x18);
+{ uint64_t x20 = (x4 | x19);
+{ uint64_t x21 = (x2 | x20);
+out[0] = x21;
+}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e512m569/fenz.h b/src/Specific/montgomery64_2e512m569/fenz.h
new file mode 100644
index 000000000..4f262e79b
--- /dev/null
+++ b/src/Specific/montgomery64_2e512m569/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/montgomery64_2e521m1/feadd.c b/src/Specific/montgomery64_2e521m1/feadd.c
new file mode 100644
index 000000000..2a8646049
--- /dev/null
+++ b/src/Specific/montgomery64_2e521m1/feadd.c
@@ -0,0 +1,58 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "feadd.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x37; uint8_t x38 = _addcarryx_u64(0x0, x5, x21, &x37);
+{ uint64_t x40; uint8_t x41 = _addcarryx_u64(x38, x7, x23, &x40);
+{ uint64_t x43; uint8_t x44 = _addcarryx_u64(x41, x9, x25, &x43);
+{ uint64_t x46; uint8_t x47 = _addcarryx_u64(x44, x11, x27, &x46);
+{ uint64_t x49; uint8_t x50 = _addcarryx_u64(x47, x13, x29, &x49);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u64(x50, x15, x31, &x52);
+{ uint64_t x55; uint8_t x56 = _addcarryx_u64(x53, x17, x33, &x55);
+{ uint64_t x58; uint8_t x59 = _addcarryx_u64(x56, x19, x35, &x58);
+{ uint64_t x61; uint8_t x62 = _addcarryx_u64(x59, x18, x34, &x61);
+{ uint64_t x64; uint8_t x65 = _subborrow_u64(0x0, x37, 0xffffffffffffffffL, &x64);
+{ uint64_t x67; uint8_t x68 = _subborrow_u64(x65, x40, 0xffffffffffffffffL, &x67);
+{ uint64_t x70; uint8_t x71 = _subborrow_u64(x68, x43, 0xffffffffffffffffL, &x70);
+{ uint64_t x73; uint8_t x74 = _subborrow_u64(x71, x46, 0xffffffffffffffffL, &x73);
+{ uint64_t x76; uint8_t x77 = _subborrow_u64(x74, x49, 0xffffffffffffffffL, &x76);
+{ uint64_t x79; uint8_t x80 = _subborrow_u64(x77, x52, 0xffffffffffffffffL, &x79);
+{ uint64_t x82; uint8_t x83 = _subborrow_u64(x80, x55, 0xffffffffffffffffL, &x82);
+{ uint64_t x85; uint8_t x86 = _subborrow_u64(x83, x58, 0xffffffffffffffffL, &x85);
+{ uint64_t x88; uint8_t x89 = _subborrow_u64(x86, x61, 0x1ff, &x88);
+{ uint64_t _; uint8_t x92 = _subborrow_u64(x89, x62, 0x0, &_);
+{ uint64_t x93 = cmovznz(x92, x88, x61);
+{ uint64_t x94 = cmovznz(x92, x85, x58);
+{ uint64_t x95 = cmovznz(x92, x82, x55);
+{ uint64_t x96 = cmovznz(x92, x79, x52);
+{ uint64_t x97 = cmovznz(x92, x76, x49);
+{ uint64_t x98 = cmovznz(x92, x73, x46);
+{ uint64_t x99 = cmovznz(x92, x70, x43);
+{ uint64_t x100 = cmovznz(x92, x67, x40);
+{ uint64_t x101 = cmovznz(x92, x64, x37);
+out[0] = x93;
+out[1] = x94;
+out[2] = x95;
+out[3] = x96;
+out[4] = x97;
+out[5] = x98;
+out[6] = x99;
+out[7] = x100;
+out[8] = x101;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/montgomery64_2e521m1/feadd.h b/src/Specific/montgomery64_2e521m1/feadd.h
new file mode 100644
index 000000000..a1ada01c4
--- /dev/null
+++ b/src/Specific/montgomery64_2e521m1/feadd.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline feadd(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/montgomery64_2e521m1/fenz.c b/src/Specific/montgomery64_2e521m1/fenz.c
new file mode 100644
index 000000000..d49e22dda
--- /dev/null
+++ b/src/Specific/montgomery64_2e521m1/fenz.c
@@ -0,0 +1,30 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fenz.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (x16 | x15);
+{ uint64_t x18 = (x14 | x17);
+{ uint64_t x19 = (x12 | x18);
+{ uint64_t x20 = (x10 | x19);
+{ uint64_t x21 = (x8 | x20);
+{ uint64_t x22 = (x6 | x21);
+{ uint64_t x23 = (x4 | x22);
+{ uint64_t x24 = (x2 | x23);
+out[0] = x24;
+}}}}}}}}
+// caller: uint64_t out[1];
diff --git a/src/Specific/montgomery64_2e521m1/fenz.h b/src/Specific/montgomery64_2e521m1/fenz.h
new file mode 100644
index 000000000..47fa749c3
--- /dev/null
+++ b/src/Specific/montgomery64_2e521m1/fenz.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fenz(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e127m1/femul.c b/src/Specific/solinas32_2e127m1/femul.c
new file mode 100644
index 000000000..8f793432e
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + ((uint64_t)x12 * x22));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23)));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19)))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17)))))));
+{ uint32_t x30 = (uint32_t) (x29 >> 0x16);
+{ uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint32_t x33 = (uint32_t) (x32 >> 0x15);
+{ uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint32_t x36 = (uint32_t) (x35 >> 0x15);
+{ uint32_t x37 = ((uint32_t)x35 & 0x1fffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint32_t x39 = (uint32_t) (x38 >> 0x15);
+{ uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint32_t x42 = (uint32_t) (x41 >> 0x15);
+{ uint32_t x43 = ((uint32_t)x41 & 0x1fffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint32_t x45 = (uint32_t) (x44 >> 0x15);
+{ uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
+{ uint32_t x47 = (x31 + x45);
+{ uint32_t x48 = (x47 >> 0x16);
+{ uint32_t x49 = (x47 & 0x3fffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x15);
+{ uint32_t x52 = (x50 & 0x1fffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e127m1/femul.h b/src/Specific/solinas32_2e127m1/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e127m1/fesquare.c b/src/Specific/solinas32_2e127m1/fesquare.c
new file mode 100644
index 000000000..8657902e8
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + ((uint64_t)x9 * x9));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10)));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6)))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4)))))));
+{ uint32_t x17 = (uint32_t) (x16 >> 0x16);
+{ uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint32_t x20 = (uint32_t) (x19 >> 0x15);
+{ uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint32_t x23 = (uint32_t) (x22 >> 0x15);
+{ uint32_t x24 = ((uint32_t)x22 & 0x1fffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint32_t x26 = (uint32_t) (x25 >> 0x15);
+{ uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x15);
+{ uint32_t x30 = ((uint32_t)x28 & 0x1fffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x15);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
+{ uint32_t x34 = (x18 + x32);
+{ uint32_t x35 = (x34 >> 0x16);
+{ uint32_t x36 = (x34 & 0x3fffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x15);
+{ uint32_t x39 = (x37 & 0x1fffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e127m1/fesquare.h b/src/Specific/solinas32_2e127m1/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e127m1/freeze.c b/src/Specific/solinas32_2e127m1/freeze.c
new file mode 100644
index 000000000..d8b2cd358
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e127m1/freeze.h b/src/Specific/solinas32_2e127m1/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e127m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e129m25/femul.c b/src/Specific/solinas32_2e129m25/femul.c
new file mode 100644
index 000000000..03718ede2
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x19 * (0x2 * ((uint64_t)x12 * x22))));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x19 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x19 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x19 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x19 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
+{ uint32_t x30 = (uint32_t) (x29 >> 0x16);
+{ uint32_t x31 = ((uint32_t)x29 & 0x3fffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x15);
+{ uint32_t x34 = ((uint32_t)x32 & 0x1fffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint32_t x36 = (uint32_t) (x35 >> 0x16);
+{ uint32_t x37 = ((uint32_t)x35 & 0x3fffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint32_t x39 = (uint32_t) (x38 >> 0x15);
+{ uint32_t x40 = ((uint32_t)x38 & 0x1fffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint32_t x42 = (uint32_t) (x41 >> 0x16);
+{ uint32_t x43 = ((uint32_t)x41 & 0x3fffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint32_t x45 = (uint32_t) (x44 >> 0x15);
+{ uint32_t x46 = ((uint32_t)x44 & 0x1fffff);
+{ uint64_t x47 = (x31 + ((uint64_t)0x19 * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x16);
+{ uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x15);
+{ uint32_t x52 = (x50 & 0x1fffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e129m25/femul.h b/src/Specific/solinas32_2e129m25/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e129m25/fesquare.c b/src/Specific/solinas32_2e129m25/fesquare.c
new file mode 100644
index 000000000..a54caf2f1
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (0x2 * ((uint64_t)x9 * x9))));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
+{ uint32_t x17 = (uint32_t) (x16 >> 0x16);
+{ uint32_t x18 = ((uint32_t)x16 & 0x3fffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x15);
+{ uint32_t x21 = ((uint32_t)x19 & 0x1fffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint32_t x23 = (uint32_t) (x22 >> 0x16);
+{ uint32_t x24 = ((uint32_t)x22 & 0x3fffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint32_t x26 = (uint32_t) (x25 >> 0x15);
+{ uint32_t x27 = ((uint32_t)x25 & 0x1fffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x16);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x15);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1fffff);
+{ uint64_t x34 = (x18 + ((uint64_t)0x19 * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x15);
+{ uint32_t x39 = (x37 & 0x1fffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e129m25/fesquare.h b/src/Specific/solinas32_2e129m25/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e129m25/freeze.c b/src/Specific/solinas32_2e129m25/freeze.c
new file mode 100644
index 000000000..386e29800
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e129m25/freeze.h b/src/Specific/solinas32_2e129m25/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e129m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e130m5/femul.c b/src/Specific/solinas32_2e130m5/femul.c
new file mode 100644
index 000000000..db0994a56
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint64_t x20 = (((uint64_t)x5 * x18) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + (((uint64_t)x11 * x15) + ((uint64_t)x10 * x13)))));
+{ uint64_t x21 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + (((uint64_t)x9 * x15) + ((uint64_t)x11 * x13)))) + (0x5 * ((uint64_t)x10 * x18)));
+{ uint64_t x22 = ((((uint64_t)x5 * x17) + (((uint64_t)x7 * x15) + ((uint64_t)x9 * x13))) + (0x5 * (((uint64_t)x11 * x18) + ((uint64_t)x10 * x19))));
+{ uint64_t x23 = ((((uint64_t)x5 * x15) + ((uint64_t)x7 * x13)) + (0x5 * (((uint64_t)x9 * x18) + (((uint64_t)x11 * x19) + ((uint64_t)x10 * x17)))));
+{ uint64_t x24 = (((uint64_t)x5 * x13) + (0x5 * (((uint64_t)x7 * x18) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x10 * x15))))));
+{ uint64_t x25 = (x24 >> 0x1a);
+{ uint32_t x26 = ((uint32_t)x24 & 0x3ffffff);
+{ uint64_t x27 = (x25 + x23);
+{ uint64_t x28 = (x27 >> 0x1a);
+{ uint32_t x29 = ((uint32_t)x27 & 0x3ffffff);
+{ uint64_t x30 = (x28 + x22);
+{ uint64_t x31 = (x30 >> 0x1a);
+{ uint32_t x32 = ((uint32_t)x30 & 0x3ffffff);
+{ uint64_t x33 = (x31 + x21);
+{ uint64_t x34 = (x33 >> 0x1a);
+{ uint32_t x35 = ((uint32_t)x33 & 0x3ffffff);
+{ uint64_t x36 = (x34 + x20);
+{ uint32_t x37 = (uint32_t) (x36 >> 0x1a);
+{ uint32_t x38 = ((uint32_t)x36 & 0x3ffffff);
+{ uint64_t x39 = (x26 + ((uint64_t)0x5 * x37));
+{ uint32_t x40 = (uint32_t) (x39 >> 0x1a);
+{ uint32_t x41 = ((uint32_t)x39 & 0x3ffffff);
+{ uint32_t x42 = (x40 + x29);
+{ uint32_t x43 = (x42 >> 0x1a);
+{ uint32_t x44 = (x42 & 0x3ffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas32_2e130m5/femul.h b/src/Specific/solinas32_2e130m5/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas32_2e130m5/fesquare.c b/src/Specific/solinas32_2e130m5/fesquare.c
new file mode 100644
index 000000000..d54648358
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x9 = (((uint64_t)x2 * x7) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x7 * x2)))));
+{ uint64_t x10 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * ((uint64_t)x7 * x7)));
+{ uint64_t x11 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x7) + ((uint64_t)x7 * x8))));
+{ uint64_t x12 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x7) + (((uint64_t)x8 * x8) + ((uint64_t)x7 * x6)))));
+{ uint64_t x13 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x7) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((uint64_t)x7 * x4))))));
+{ uint64_t x14 = (x13 >> 0x1a);
+{ uint32_t x15 = ((uint32_t)x13 & 0x3ffffff);
+{ uint64_t x16 = (x14 + x12);
+{ uint64_t x17 = (x16 >> 0x1a);
+{ uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
+{ uint64_t x19 = (x17 + x11);
+{ uint64_t x20 = (x19 >> 0x1a);
+{ uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
+{ uint64_t x22 = (x20 + x10);
+{ uint64_t x23 = (x22 >> 0x1a);
+{ uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
+{ uint64_t x25 = (x23 + x9);
+{ uint32_t x26 = (uint32_t) (x25 >> 0x1a);
+{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+{ uint64_t x28 = (x15 + ((uint64_t)0x5 * x26));
+{ uint32_t x29 = (uint32_t) (x28 >> 0x1a);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+{ uint32_t x31 = (x29 + x18);
+{ uint32_t x32 = (x31 >> 0x1a);
+{ uint32_t x33 = (x31 & 0x3ffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas32_2e130m5/fesquare.h b/src/Specific/solinas32_2e130m5/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e130m5/freeze.c b/src/Specific/solinas32_2e130m5/freeze.c
new file mode 100644
index 000000000..38ccd3bc8
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e130m5/freeze.h b/src/Specific/solinas32_2e130m5/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas32_2e130m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e137m13/femul.c b/src/Specific/solinas32_2e137m13/femul.c
new file mode 100644
index 000000000..67bc63d33
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0xd * ((uint64_t)x16 * x30)));
+{ uint64_t x34 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((uint64_t)x15 * x19)))))) + (0xd * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0xd * (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))));
+{ uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0xd * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0xd * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0xd * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+{ uint64_t x39 = (((uint64_t)x5 * x19) + (0xd * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+{ uint32_t x40 = (uint32_t) (x39 >> 0x12);
+{ uint32_t x41 = ((uint32_t)x39 & 0x3ffff);
+{ uint64_t x42 = (x40 + x38);
+{ uint32_t x43 = (uint32_t) (x42 >> 0x11);
+{ uint32_t x44 = ((uint32_t)x42 & 0x1ffff);
+{ uint64_t x45 = (x43 + x37);
+{ uint32_t x46 = (uint32_t) (x45 >> 0x11);
+{ uint32_t x47 = ((uint32_t)x45 & 0x1ffff);
+{ uint64_t x48 = (x46 + x36);
+{ uint32_t x49 = (uint32_t) (x48 >> 0x11);
+{ uint32_t x50 = ((uint32_t)x48 & 0x1ffff);
+{ uint64_t x51 = (x49 + x35);
+{ uint32_t x52 = (uint32_t) (x51 >> 0x11);
+{ uint32_t x53 = ((uint32_t)x51 & 0x1ffff);
+{ uint64_t x54 = (x52 + x34);
+{ uint32_t x55 = (uint32_t) (x54 >> 0x11);
+{ uint32_t x56 = ((uint32_t)x54 & 0x1ffff);
+{ uint64_t x57 = (x55 + x33);
+{ uint32_t x58 = (uint32_t) (x57 >> 0x11);
+{ uint32_t x59 = ((uint32_t)x57 & 0x1ffff);
+{ uint64_t x60 = (x58 + x32);
+{ uint32_t x61 = (uint32_t) (x60 >> 0x11);
+{ uint32_t x62 = ((uint32_t)x60 & 0x1ffff);
+{ uint32_t x63 = (x41 + (0xd * x61));
+{ uint32_t x64 = (x63 >> 0x12);
+{ uint32_t x65 = (x63 & 0x3ffff);
+{ uint32_t x66 = (x64 + x44);
+{ uint32_t x67 = (x66 >> 0x11);
+{ uint32_t x68 = (x66 & 0x1ffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e137m13/femul.h b/src/Specific/solinas32_2e137m13/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e137m13/fesquare.c b/src/Specific/solinas32_2e137m13/fesquare.c
new file mode 100644
index 000000000..805e9790e
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xd * ((uint64_t)x13 * x13)));
+{ uint64_t x17 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xd * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xd * (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))));
+{ uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xd * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xd * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xd * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+{ uint64_t x22 = (((uint64_t)x2 * x2) + (0xd * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+{ uint32_t x23 = (uint32_t) (x22 >> 0x12);
+{ uint32_t x24 = ((uint32_t)x22 & 0x3ffff);
+{ uint64_t x25 = (x23 + x21);
+{ uint32_t x26 = (uint32_t) (x25 >> 0x11);
+{ uint32_t x27 = ((uint32_t)x25 & 0x1ffff);
+{ uint64_t x28 = (x26 + x20);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x11);
+{ uint32_t x30 = ((uint32_t)x28 & 0x1ffff);
+{ uint64_t x31 = (x29 + x19);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x11);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1ffff);
+{ uint64_t x34 = (x32 + x18);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x11);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1ffff);
+{ uint64_t x37 = (x35 + x17);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x11);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
+{ uint64_t x40 = (x38 + x16);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x11);
+{ uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
+{ uint64_t x43 = (x41 + x15);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x11);
+{ uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
+{ uint32_t x46 = (x24 + (0xd * x44));
+{ uint32_t x47 = (x46 >> 0x12);
+{ uint32_t x48 = (x46 & 0x3ffff);
+{ uint32_t x49 = (x47 + x27);
+{ uint32_t x50 = (x49 >> 0x11);
+{ uint32_t x51 = (x49 & 0x1ffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e137m13/fesquare.h b/src/Specific/solinas32_2e137m13/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e137m13/freeze.c b/src/Specific/solinas32_2e137m13/freeze.c
new file mode 100644
index 000000000..041ae06fe
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 18 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fff3;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e137m13/freeze.h b/src/Specific/solinas32_2e137m13/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e137m13/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e140m27/femul.c b/src/Specific/solinas32_2e140m27/femul.c
new file mode 100644
index 000000000..387a48ea0
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x1b * ((uint64_t)x12 * x22)));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x1b * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x1b * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x1b * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x1b * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+{ uint64_t x30 = (x29 >> 0x18);
+{ uint32_t x31 = ((uint32_t)x29 & 0xffffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x17);
+{ uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint64_t x36 = (x35 >> 0x17);
+{ uint32_t x37 = ((uint32_t)x35 & 0x7fffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint64_t x39 = (x38 >> 0x18);
+{ uint32_t x40 = ((uint32_t)x38 & 0xffffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint32_t x42 = (uint32_t) (x41 >> 0x17);
+{ uint32_t x43 = ((uint32_t)x41 & 0x7fffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint32_t x45 = (uint32_t) (x44 >> 0x17);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
+{ uint64_t x47 = (x31 + ((uint64_t)0x1b * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x18);
+{ uint32_t x49 = ((uint32_t)x47 & 0xffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x17);
+{ uint32_t x52 = (x50 & 0x7fffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e140m27/femul.h b/src/Specific/solinas32_2e140m27/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e140m27/fesquare.c b/src/Specific/solinas32_2e140m27/fesquare.c
new file mode 100644
index 000000000..07733e6c3
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * ((uint64_t)x9 * x9)));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1b * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x1b * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+{ uint64_t x17 = (x16 >> 0x18);
+{ uint32_t x18 = ((uint32_t)x16 & 0xffffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x17);
+{ uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint64_t x23 = (x22 >> 0x17);
+{ uint32_t x24 = ((uint32_t)x22 & 0x7fffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint64_t x26 = (x25 >> 0x18);
+{ uint32_t x27 = ((uint32_t)x25 & 0xffffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x17);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x17);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+{ uint64_t x34 = (x18 + ((uint64_t)0x1b * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x18);
+{ uint32_t x36 = ((uint32_t)x34 & 0xffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x17);
+{ uint32_t x39 = (x37 & 0x7fffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e140m27/fesquare.h b/src/Specific/solinas32_2e140m27/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e140m27/freeze.c b/src/Specific/solinas32_2e140m27/freeze.c
new file mode 100644
index 000000000..6d3117413
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffe5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e140m27/freeze.h b/src/Specific/solinas32_2e140m27/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e140m27/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e141m9/femul.c b/src/Specific/solinas32_2e141m9/femul.c
new file mode 100644
index 000000000..edbb8a1e1
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + (((uint64_t)x9 * x19) + ((0x2 * ((uint64_t)x11 * x17)) + ((uint64_t)x13 * x15))))) + (0x9 * (0x2 * ((uint64_t)x12 * x22))));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x9 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x9 * ((0x2 * ((uint64_t)x11 * x22)) + (((uint64_t)x13 * x23) + (0x2 * ((uint64_t)x12 * x21))))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x9 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x9 * ((0x2 * ((uint64_t)x7 * x22)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + (((uint64_t)x13 * x19) + (0x2 * ((uint64_t)x12 * x17))))))));
+{ uint64_t x30 = (x29 >> 0x18);
+{ uint32_t x31 = ((uint32_t)x29 & 0xffffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x17);
+{ uint32_t x34 = ((uint32_t)x32 & 0x7fffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint32_t x36 = (uint32_t) (x35 >> 0x18);
+{ uint32_t x37 = ((uint32_t)x35 & 0xffffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint32_t x39 = (uint32_t) (x38 >> 0x17);
+{ uint32_t x40 = ((uint32_t)x38 & 0x7fffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint32_t x42 = (uint32_t) (x41 >> 0x18);
+{ uint32_t x43 = ((uint32_t)x41 & 0xffffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint32_t x45 = (uint32_t) (x44 >> 0x17);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7fffff);
+{ uint64_t x47 = (x31 + ((uint64_t)0x9 * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x18);
+{ uint32_t x49 = ((uint32_t)x47 & 0xffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x17);
+{ uint32_t x52 = (x50 & 0x7fffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e141m9/femul.h b/src/Specific/solinas32_2e141m9/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e141m9/fesquare.c b/src/Specific/solinas32_2e141m9/fesquare.c
new file mode 100644
index 000000000..1b26664f6
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (0x2 * ((uint64_t)x9 * x9))));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * ((0x2 * ((uint64_t)x8 * x9)) + (((uint64_t)x10 * x10) + (0x2 * ((uint64_t)x9 * x8))))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x9)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + (0x2 * ((uint64_t)x9 * x4))))))));
+{ uint64_t x17 = (x16 >> 0x18);
+{ uint32_t x18 = ((uint32_t)x16 & 0xffffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x17);
+{ uint32_t x21 = ((uint32_t)x19 & 0x7fffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint32_t x23 = (uint32_t) (x22 >> 0x18);
+{ uint32_t x24 = ((uint32_t)x22 & 0xffffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint32_t x26 = (uint32_t) (x25 >> 0x17);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7fffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x18);
+{ uint32_t x30 = ((uint32_t)x28 & 0xffffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x17);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+{ uint64_t x34 = (x18 + ((uint64_t)0x9 * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x18);
+{ uint32_t x36 = ((uint32_t)x34 & 0xffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x17);
+{ uint32_t x39 = (x37 & 0x7fffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e141m9/fesquare.h b/src/Specific/solinas32_2e141m9/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e141m9/freeze.c b/src/Specific/solinas32_2e141m9/freeze.c
new file mode 100644
index 000000000..cc7f82bb3
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e141m9/freeze.h b/src/Specific/solinas32_2e141m9/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e141m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e150m3/femul.c b/src/Specific/solinas32_2e150m3/femul.c
new file mode 100644
index 000000000..6b0d36b25
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ ℤ x20 = (((uint64_t)x5 * x18) +ℤ (((uint64_t)x7 * x19) +ℤ (((uint64_t)x9 * x17) +ℤ (((uint64_t)x11 * x15) +ℤ ((uint64_t)x10 * x13)))));
+{ ℤ x21 = ((((uint64_t)x5 * x19) +ℤ (((uint64_t)x7 * x17) +ℤ (((uint64_t)x9 * x15) +ℤ ((uint64_t)x11 * x13)))) +ℤ (0x3 *ℤ ((uint64_t)x10 * x18)));
+{ ℤ x22 = ((((uint64_t)x5 * x17) +ℤ (((uint64_t)x7 * x15) +ℤ ((uint64_t)x9 * x13))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x18) +ℤ ((uint64_t)x10 * x19))));
+{ ℤ x23 = ((((uint64_t)x5 * x15) +ℤ ((uint64_t)x7 * x13)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x18) +ℤ (((uint64_t)x11 * x19) +ℤ ((uint64_t)x10 * x17)))));
+{ ℤ x24 = (((uint64_t)x5 * x13) +ℤ (0x3 *ℤ (((uint64_t)x7 * x18) +ℤ (((uint64_t)x9 * x19) +ℤ (((uint64_t)x11 * x17) +ℤ ((uint64_t)x10 * x15))))));
+{ uint64_t x25 = (x24 >> 0x1e);
+{ uint32_t x26 = (x24 & 0x3fffffff);
+{ ℤ x27 = (x25 +ℤ x23);
+{ uint64_t x28 = (x27 >> 0x1e);
+{ uint32_t x29 = (x27 & 0x3fffffff);
+{ ℤ x30 = (x28 +ℤ x22);
+{ uint64_t x31 = (x30 >> 0x1e);
+{ uint32_t x32 = (x30 & 0x3fffffff);
+{ ℤ x33 = (x31 +ℤ x21);
+{ uint64_t x34 = (x33 >> 0x1e);
+{ uint32_t x35 = (x33 & 0x3fffffff);
+{ ℤ x36 = (x34 +ℤ x20);
+{ uint64_t x37 = (x36 >> 0x1e);
+{ uint32_t x38 = (x36 & 0x3fffffff);
+{ uint64_t x39 = (x26 + (0x3 * x37));
+{ uint32_t x40 = (uint32_t) (x39 >> 0x1e);
+{ uint32_t x41 = ((uint32_t)x39 & 0x3fffffff);
+{ uint32_t x42 = (x40 + x29);
+{ uint32_t x43 = (x42 >> 0x1e);
+{ uint32_t x44 = (x42 & 0x3fffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas32_2e150m3/femul.h b/src/Specific/solinas32_2e150m3/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas32_2e150m3/fesquare.c b/src/Specific/solinas32_2e150m3/fesquare.c
new file mode 100644
index 000000000..e328fb2f2
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ ℤ x9 = (((uint64_t)x2 * x7) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x7 * x2)))));
+{ ℤ x10 = ((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ ((uint64_t)x7 * x7)));
+{ ℤ x11 = ((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x7) +ℤ ((uint64_t)x7 * x8))));
+{ ℤ x12 = ((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x7) +ℤ (((uint64_t)x8 * x8) +ℤ ((uint64_t)x7 * x6)))));
+{ ℤ x13 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x7) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ ((uint64_t)x7 * x4))))));
+{ uint64_t x14 = (x13 >> 0x1e);
+{ uint32_t x15 = (x13 & 0x3fffffff);
+{ ℤ x16 = (x14 +ℤ x12);
+{ uint64_t x17 = (x16 >> 0x1e);
+{ uint32_t x18 = (x16 & 0x3fffffff);
+{ ℤ x19 = (x17 +ℤ x11);
+{ uint64_t x20 = (x19 >> 0x1e);
+{ uint32_t x21 = (x19 & 0x3fffffff);
+{ ℤ x22 = (x20 +ℤ x10);
+{ uint64_t x23 = (x22 >> 0x1e);
+{ uint32_t x24 = (x22 & 0x3fffffff);
+{ ℤ x25 = (x23 +ℤ x9);
+{ uint64_t x26 = (x25 >> 0x1e);
+{ uint32_t x27 = (x25 & 0x3fffffff);
+{ uint64_t x28 = (x15 + (0x3 * x26));
+{ uint32_t x29 = (uint32_t) (x28 >> 0x1e);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3fffffff);
+{ uint32_t x31 = (x29 + x18);
+{ uint32_t x32 = (x31 >> 0x1e);
+{ uint32_t x33 = (x31 & 0x3fffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas32_2e150m3/fesquare.h b/src/Specific/solinas32_2e150m3/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e150m3/freeze.c b/src/Specific/solinas32_2e150m3/freeze.c
new file mode 100644
index 000000000..624c3b4d9
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e150m3/freeze.h b/src/Specific/solinas32_2e150m3/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas32_2e150m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e150m5/femul.c b/src/Specific/solinas32_2e150m5/femul.c
new file mode 100644
index 000000000..2100f2db0
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x5 * ((uint64_t)x12 * x22)));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) + (0x5 * (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) + (0x5 * (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x5 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x5 * (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
+{ uint64_t x30 = (x29 >> 0x19);
+{ uint32_t x31 = ((uint32_t)x29 & 0x1ffffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x19);
+{ uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint64_t x36 = (x35 >> 0x19);
+{ uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint64_t x39 = (x38 >> 0x19);
+{ uint32_t x40 = ((uint32_t)x38 & 0x1ffffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint32_t x42 = (uint32_t) (x41 >> 0x19);
+{ uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint32_t x45 = (uint32_t) (x44 >> 0x19);
+{ uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
+{ uint64_t x47 = (x31 + ((uint64_t)0x5 * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x19);
+{ uint32_t x49 = ((uint32_t)x47 & 0x1ffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x19);
+{ uint32_t x52 = (x50 & 0x1ffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e150m5/femul.h b/src/Specific/solinas32_2e150m5/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e150m5/fesquare.c b/src/Specific/solinas32_2e150m5/fesquare.c
new file mode 100644
index 000000000..f45942450
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x5 * ((uint64_t)x9 * x9)));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x5 * (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
+{ uint64_t x17 = (x16 >> 0x19);
+{ uint32_t x18 = ((uint32_t)x16 & 0x1ffffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x19);
+{ uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint64_t x23 = (x22 >> 0x19);
+{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint64_t x26 = (x25 >> 0x19);
+{ uint32_t x27 = ((uint32_t)x25 & 0x1ffffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x19);
+{ uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x19);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+{ uint64_t x34 = (x18 + ((uint64_t)0x5 * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x19);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x19);
+{ uint32_t x39 = (x37 & 0x1ffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e150m5/fesquare.h b/src/Specific/solinas32_2e150m5/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e150m5/freeze.c b/src/Specific/solinas32_2e150m5/freeze.c
new file mode 100644
index 000000000..4dee1e6d1
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e150m5/freeze.h b/src/Specific/solinas32_2e150m5/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e150m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e152m17/femul.c b/src/Specific/solinas32_2e152m17/femul.c
new file mode 100644
index 000000000..68895917e
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0x11 * ((uint64_t)x12 * x22)));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0x11 * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0x11 * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0x11 * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+{ uint64_t x30 = (x29 >> 0x1a);
+{ uint32_t x31 = ((uint32_t)x29 & 0x3ffffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x19);
+{ uint32_t x34 = ((uint32_t)x32 & 0x1ffffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint64_t x36 = (x35 >> 0x19);
+{ uint32_t x37 = ((uint32_t)x35 & 0x1ffffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint64_t x39 = (x38 >> 0x1a);
+{ uint32_t x40 = ((uint32_t)x38 & 0x3ffffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint64_t x42 = (x41 >> 0x19);
+{ uint32_t x43 = ((uint32_t)x41 & 0x1ffffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint64_t x45 = (x44 >> 0x19);
+{ uint32_t x46 = ((uint32_t)x44 & 0x1ffffff);
+{ uint64_t x47 = (x31 + (0x11 * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x1a);
+{ uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x19);
+{ uint32_t x52 = (x50 & 0x1ffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e152m17/femul.h b/src/Specific/solinas32_2e152m17/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e152m17/fesquare.c b/src/Specific/solinas32_2e152m17/fesquare.c
new file mode 100644
index 000000000..9f5e71fda
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * ((uint64_t)x9 * x9)));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+{ uint64_t x17 = (x16 >> 0x1a);
+{ uint32_t x18 = ((uint32_t)x16 & 0x3ffffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x19);
+{ uint32_t x21 = ((uint32_t)x19 & 0x1ffffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint64_t x23 = (x22 >> 0x19);
+{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint64_t x26 = (x25 >> 0x1a);
+{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint64_t x29 = (x28 >> 0x19);
+{ uint32_t x30 = ((uint32_t)x28 & 0x1ffffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint64_t x32 = (x31 >> 0x19);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+{ uint64_t x34 = (x18 + (0x11 * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x1a);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x19);
+{ uint32_t x39 = (x37 & 0x1ffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e152m17/fesquare.h b/src/Specific/solinas32_2e152m17/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e152m17/freeze.c b/src/Specific/solinas32_2e152m17/freeze.c
new file mode 100644
index 000000000..e26984208
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e152m17/freeze.h b/src/Specific/solinas32_2e152m17/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e152m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e158m15/femul.c b/src/Specific/solinas32_2e158m15/femul.c
new file mode 100644
index 000000000..0af9ac6da
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint64_t x24 = (((uint64_t)x5 * x22) + ((0x2 * ((uint64_t)x7 * x23)) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((0x2 * ((uint64_t)x13 * x17)) + ((uint64_t)x12 * x15))))));
+{ uint64_t x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) + (0xf * ((uint64_t)x12 * x22)));
+{ uint64_t x26 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((0x2 * ((uint64_t)x9 * x17)) + ((uint64_t)x11 * x15)))) + (0xf * ((0x2 * ((uint64_t)x13 * x22)) + (0x2 * ((uint64_t)x12 * x23)))));
+{ uint64_t x27 = ((((uint64_t)x5 * x19) + ((0x2 * ((uint64_t)x7 * x17)) + ((uint64_t)x9 * x15))) + (0xf * (((uint64_t)x11 * x22) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x12 * x21)))));
+{ uint64_t x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) + (0xf * (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ uint64_t x29 = (((uint64_t)x5 * x15) + (0xf * ((0x2 * ((uint64_t)x7 * x22)) + ((0x2 * ((uint64_t)x9 * x23)) + (((uint64_t)x11 * x21) + ((0x2 * ((uint64_t)x13 * x19)) + (0x2 * ((uint64_t)x12 * x17))))))));
+{ uint64_t x30 = (x29 >> 0x1b);
+{ uint32_t x31 = ((uint32_t)x29 & 0x7ffffff);
+{ uint64_t x32 = (x30 + x28);
+{ uint64_t x33 = (x32 >> 0x1a);
+{ uint32_t x34 = ((uint32_t)x32 & 0x3ffffff);
+{ uint64_t x35 = (x33 + x27);
+{ uint64_t x36 = (x35 >> 0x1a);
+{ uint32_t x37 = ((uint32_t)x35 & 0x3ffffff);
+{ uint64_t x38 = (x36 + x26);
+{ uint64_t x39 = (x38 >> 0x1b);
+{ uint32_t x40 = ((uint32_t)x38 & 0x7ffffff);
+{ uint64_t x41 = (x39 + x25);
+{ uint64_t x42 = (x41 >> 0x1a);
+{ uint32_t x43 = ((uint32_t)x41 & 0x3ffffff);
+{ uint64_t x44 = (x42 + x24);
+{ uint64_t x45 = (x44 >> 0x1a);
+{ uint32_t x46 = ((uint32_t)x44 & 0x3ffffff);
+{ uint64_t x47 = (x31 + (0xf * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x1b);
+{ uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x1a);
+{ uint32_t x52 = (x50 & 0x3ffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e158m15/femul.h b/src/Specific/solinas32_2e158m15/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e158m15/fesquare.c b/src/Specific/solinas32_2e158m15/fesquare.c
new file mode 100644
index 000000000..81cdfb2c9
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x11 = (((uint64_t)x2 * x9) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x9 * x2))))));
+{ uint64_t x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * ((uint64_t)x9 * x9)));
+{ uint64_t x13 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * ((0x2 * ((uint64_t)x10 * x9)) + (0x2 * ((uint64_t)x9 * x10)))));
+{ uint64_t x14 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x9) + ((0x2 * ((uint64_t)x10 * x10)) + ((uint64_t)x9 * x8)))));
+{ uint64_t x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ uint64_t x16 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x9)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + (0x2 * ((uint64_t)x9 * x4))))))));
+{ uint64_t x17 = (x16 >> 0x1b);
+{ uint32_t x18 = ((uint32_t)x16 & 0x7ffffff);
+{ uint64_t x19 = (x17 + x15);
+{ uint64_t x20 = (x19 >> 0x1a);
+{ uint32_t x21 = ((uint32_t)x19 & 0x3ffffff);
+{ uint64_t x22 = (x20 + x14);
+{ uint64_t x23 = (x22 >> 0x1a);
+{ uint32_t x24 = ((uint32_t)x22 & 0x3ffffff);
+{ uint64_t x25 = (x23 + x13);
+{ uint64_t x26 = (x25 >> 0x1b);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+{ uint64_t x28 = (x26 + x12);
+{ uint64_t x29 = (x28 >> 0x1a);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+{ uint64_t x31 = (x29 + x11);
+{ uint64_t x32 = (x31 >> 0x1a);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+{ uint64_t x34 = (x18 + (0xf * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x1b);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x1a);
+{ uint32_t x39 = (x37 & 0x3ffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e158m15/fesquare.h b/src/Specific/solinas32_2e158m15/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e158m15/freeze.c b/src/Specific/solinas32_2e158m15/freeze.c
new file mode 100644
index 000000000..e57392de5
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e158m15/freeze.h b/src/Specific/solinas32_2e158m15/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e158m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e165m25/femul.c b/src/Specific/solinas32_2e165m25/femul.c
new file mode 100644
index 000000000..7f0aa5f38
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x19 * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x19 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x19 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x19 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x19 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x19 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x19 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x19 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+{ uint32_t x45 = (uint32_t) (x44 >> 0x13);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint32_t x48 = (uint32_t) (x47 >> 0x12);
+{ uint32_t x49 = ((uint32_t)x47 & 0x3ffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint32_t x51 = (uint32_t) (x50 >> 0x12);
+{ uint32_t x52 = ((uint32_t)x50 & 0x3ffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint32_t x54 = (uint32_t) (x53 >> 0x13);
+{ uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint32_t x57 = (uint32_t) (x56 >> 0x12);
+{ uint32_t x58 = ((uint32_t)x56 & 0x3ffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint32_t x60 = (uint32_t) (x59 >> 0x12);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x13);
+{ uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x12);
+{ uint32_t x67 = ((uint32_t)x65 & 0x3ffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x12);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3ffff);
+{ uint32_t x71 = (x46 + (0x19 * x69));
+{ uint32_t x72 = (x71 >> 0x13);
+{ uint32_t x73 = (x71 & 0x7ffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x12);
+{ uint32_t x76 = (x74 & 0x3ffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e165m25/femul.h b/src/Specific/solinas32_2e165m25/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e165m25/fesquare.c b/src/Specific/solinas32_2e165m25/fesquare.c
new file mode 100644
index 000000000..a35d4eb6e
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x19 * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+{ uint32_t x26 = (uint32_t) (x25 >> 0x13);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x12);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x12);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3ffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x12);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3ffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x12);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3ffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x12);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3ffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x12);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3ffff);
+{ uint32_t x52 = (x27 + (0x19 * x50));
+{ uint32_t x53 = (x52 >> 0x13);
+{ uint32_t x54 = (x52 & 0x7ffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x12);
+{ uint32_t x57 = (x55 & 0x3ffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e165m25/fesquare.h b/src/Specific/solinas32_2e165m25/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e165m25/freeze.c b/src/Specific/solinas32_2e165m25/freeze.c
new file mode 100644
index 000000000..592c36e10
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e165m25/freeze.h b/src/Specific/solinas32_2e165m25/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e165m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e166m5/femul.c b/src/Specific/solinas32_2e166m5/femul.c
new file mode 100644
index 000000000..6efdff2fa
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/femul.c
@@ -0,0 +1,81 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25)
+{ uint64_t x44 = (((uint64_t)x5 * x42) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + ((0x2 * ((uint64_t)x17 * x33)) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((0x2 * ((uint64_t)x23 * x27)) + ((uint64_t)x22 * x25)))))))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + ((uint64_t)x23 * x25)))))))))) + (0x5 * ((uint64_t)x22 * x42)));
+{ uint64_t x46 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((uint64_t)x21 * x25))))))))) + (0x5 * (((uint64_t)x23 * x42) + ((uint64_t)x22 * x43))));
+{ uint64_t x47 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((uint64_t)x19 * x25)))))))) + (0x5 * (((uint64_t)x21 * x42) + (((uint64_t)x23 * x43) + ((uint64_t)x22 * x41)))));
+{ uint64_t x48 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((uint64_t)x17 * x25))))))) + (0x5 * (((uint64_t)x19 * x42) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + ((uint64_t)x22 * x39))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((uint64_t)x15 * x25)))))) + (0x5 * (((uint64_t)x17 * x42) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + ((uint64_t)x22 * x37)))))));
+{ uint64_t x50 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((uint64_t)x13 * x25))))) + (0x5 * (((uint64_t)x15 * x42) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((uint64_t)x22 * x35))))))));
+{ uint64_t x51 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((uint64_t)x11 * x25)))) + (0x5 * (((uint64_t)x13 * x42) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x22 * x33)))))))));
+{ uint64_t x52 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((uint64_t)x9 * x25))) + (0x5 * (((uint64_t)x11 * x42) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x22 * x31))))))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x27) + ((uint64_t)x7 * x25)) + (0x5 * (((uint64_t)x9 * x42) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x22 * x29)))))))))));
+{ uint64_t x54 = (((uint64_t)x5 * x25) + (0x5 * ((0x2 * ((uint64_t)x7 * x42)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((0x2 * ((uint64_t)x17 * x35)) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + (0x2 * ((uint64_t)x22 * x27)))))))))))));
+{ uint32_t x55 = (uint32_t) (x54 >> 0x10);
+{ uint32_t x56 = ((uint32_t)x54 & 0xffff);
+{ uint64_t x57 = (x55 + x53);
+{ uint32_t x58 = (uint32_t) (x57 >> 0xf);
+{ uint32_t x59 = ((uint32_t)x57 & 0x7fff);
+{ uint64_t x60 = (x58 + x52);
+{ uint32_t x61 = (uint32_t) (x60 >> 0xf);
+{ uint32_t x62 = ((uint32_t)x60 & 0x7fff);
+{ uint64_t x63 = (x61 + x51);
+{ uint32_t x64 = (uint32_t) (x63 >> 0xf);
+{ uint32_t x65 = ((uint32_t)x63 & 0x7fff);
+{ uint64_t x66 = (x64 + x50);
+{ uint32_t x67 = (uint32_t) (x66 >> 0xf);
+{ uint32_t x68 = ((uint32_t)x66 & 0x7fff);
+{ uint64_t x69 = (x67 + x49);
+{ uint32_t x70 = (uint32_t) (x69 >> 0xf);
+{ uint32_t x71 = ((uint32_t)x69 & 0x7fff);
+{ uint64_t x72 = (x70 + x48);
+{ uint32_t x73 = (uint32_t) (x72 >> 0xf);
+{ uint32_t x74 = ((uint32_t)x72 & 0x7fff);
+{ uint64_t x75 = (x73 + x47);
+{ uint32_t x76 = (uint32_t) (x75 >> 0xf);
+{ uint32_t x77 = ((uint32_t)x75 & 0x7fff);
+{ uint64_t x78 = (x76 + x46);
+{ uint32_t x79 = (uint32_t) (x78 >> 0xf);
+{ uint32_t x80 = ((uint32_t)x78 & 0x7fff);
+{ uint64_t x81 = (x79 + x45);
+{ uint32_t x82 = (uint32_t) (x81 >> 0xf);
+{ uint32_t x83 = ((uint32_t)x81 & 0x7fff);
+{ uint64_t x84 = (x82 + x44);
+{ uint32_t x85 = (uint32_t) (x84 >> 0xf);
+{ uint32_t x86 = ((uint32_t)x84 & 0x7fff);
+{ uint32_t x87 = (x56 + (0x5 * x85));
+{ uint32_t x88 = (x87 >> 0x10);
+{ uint32_t x89 = (x87 & 0xffff);
+{ uint32_t x90 = (x88 + x59);
+{ uint32_t x91 = (x90 >> 0xf);
+{ uint32_t x92 = (x90 & 0x7fff);
+out[0] = x86;
+out[1] = x83;
+out[2] = x80;
+out[3] = x77;
+out[4] = x74;
+out[5] = x71;
+out[6] = x68;
+out[7] = x65;
+out[8] = x91 + x62;
+out[9] = x92;
+out[10] = x89;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/solinas32_2e166m5/femul.h b/src/Specific/solinas32_2e166m5/femul.h
new file mode 100644
index 000000000..ae371cb33
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x42, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25);
diff --git a/src/Specific/solinas32_2e166m5/fesquare.c b/src/Specific/solinas32_2e166m5/fesquare.c
new file mode 100644
index 000000000..b9645bca1
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/fesquare.c
@@ -0,0 +1,81 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x21 = (((uint64_t)x2 * x19) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x19 * x2)))))))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * ((uint64_t)x19 * x19)));
+{ uint64_t x23 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x19) + ((uint64_t)x19 * x20))));
+{ uint64_t x24 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x19) + (((uint64_t)x20 * x20) + ((uint64_t)x19 * x18)))));
+{ uint64_t x25 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * (((uint64_t)x16 * x19) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x19 * x16))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x19) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + ((uint64_t)x19 * x14)))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x19) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + ((uint64_t)x19 * x12))))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x19) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((uint64_t)x19 * x10)))))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x19) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + ((uint64_t)x19 * x8))))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x19) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + ((uint64_t)x19 * x6)))))))))));
+{ uint64_t x31 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x19)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + (0x2 * ((uint64_t)x19 * x4)))))))))))));
+{ uint32_t x32 = (uint32_t) (x31 >> 0x10);
+{ uint32_t x33 = ((uint32_t)x31 & 0xffff);
+{ uint64_t x34 = (x32 + x30);
+{ uint32_t x35 = (uint32_t) (x34 >> 0xf);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7fff);
+{ uint64_t x37 = (x35 + x29);
+{ uint32_t x38 = (uint32_t) (x37 >> 0xf);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7fff);
+{ uint64_t x40 = (x38 + x28);
+{ uint32_t x41 = (uint32_t) (x40 >> 0xf);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7fff);
+{ uint64_t x43 = (x41 + x27);
+{ uint32_t x44 = (uint32_t) (x43 >> 0xf);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7fff);
+{ uint64_t x46 = (x44 + x26);
+{ uint32_t x47 = (uint32_t) (x46 >> 0xf);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7fff);
+{ uint64_t x49 = (x47 + x25);
+{ uint32_t x50 = (uint32_t) (x49 >> 0xf);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fff);
+{ uint64_t x52 = (x50 + x24);
+{ uint32_t x53 = (uint32_t) (x52 >> 0xf);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7fff);
+{ uint64_t x55 = (x53 + x23);
+{ uint32_t x56 = (uint32_t) (x55 >> 0xf);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7fff);
+{ uint64_t x58 = (x56 + x22);
+{ uint32_t x59 = (uint32_t) (x58 >> 0xf);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fff);
+{ uint64_t x61 = (x59 + x21);
+{ uint32_t x62 = (uint32_t) (x61 >> 0xf);
+{ uint32_t x63 = ((uint32_t)x61 & 0x7fff);
+{ uint32_t x64 = (x33 + (0x5 * x62));
+{ uint32_t x65 = (x64 >> 0x10);
+{ uint32_t x66 = (x64 & 0xffff);
+{ uint32_t x67 = (x65 + x36);
+{ uint32_t x68 = (x67 >> 0xf);
+{ uint32_t x69 = (x67 & 0x7fff);
+out[0] = x63;
+out[1] = x60;
+out[2] = x57;
+out[3] = x54;
+out[4] = x51;
+out[5] = x48;
+out[6] = x45;
+out[7] = x42;
+out[8] = x68 + x39;
+out[9] = x69;
+out[10] = x66;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[11];
diff --git a/src/Specific/solinas32_2e166m5/fesquare.h b/src/Specific/solinas32_2e166m5/fesquare.h
new file mode 100644
index 000000000..126b42bf3
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e166m5/freeze.c b/src/Specific/solinas32_2e166m5/freeze.c
new file mode 100644
index 000000000..e2c8a95fd
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x22;
+out[1] = uint8_t x23 = Op Syntax.SubWithGetBorrow 16 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e166m5/freeze.h b/src/Specific/solinas32_2e166m5/freeze.h
new file mode 100644
index 000000000..ccf1cb263
--- /dev/null
+++ b/src/Specific/solinas32_2e166m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e171m19/femul.c b/src/Specific/solinas32_2e171m19/femul.c
new file mode 100644
index 000000000..2fcdf2aad
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x13 * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x13 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x13 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x13 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x13 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x13 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x13 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x13 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+{ uint32_t x45 = (uint32_t) (x44 >> 0x13);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7ffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint32_t x48 = (uint32_t) (x47 >> 0x13);
+{ uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint32_t x51 = (uint32_t) (x50 >> 0x13);
+{ uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint32_t x54 = (uint32_t) (x53 >> 0x13);
+{ uint32_t x55 = ((uint32_t)x53 & 0x7ffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint32_t x57 = (uint32_t) (x56 >> 0x13);
+{ uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint32_t x60 = (uint32_t) (x59 >> 0x13);
+{ uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x13);
+{ uint32_t x64 = ((uint32_t)x62 & 0x7ffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x13);
+{ uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x13);
+{ uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
+{ uint32_t x71 = (x46 + (0x13 * x69));
+{ uint32_t x72 = (x71 >> 0x13);
+{ uint32_t x73 = (x71 & 0x7ffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x13);
+{ uint32_t x76 = (x74 & 0x7ffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e171m19/femul.h b/src/Specific/solinas32_2e171m19/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e171m19/fesquare.c b/src/Specific/solinas32_2e171m19/fesquare.c
new file mode 100644
index 000000000..02e4e804e
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x13 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+{ uint32_t x26 = (uint32_t) (x25 >> 0x13);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x13);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+{ uint32_t x52 = (x27 + (0x13 * x50));
+{ uint32_t x53 = (x52 >> 0x13);
+{ uint32_t x54 = (x52 & 0x7ffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x13);
+{ uint32_t x57 = (x55 & 0x7ffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e171m19/fesquare.h b/src/Specific/solinas32_2e171m19/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e171m19/freeze.c b/src/Specific/solinas32_2e171m19/freeze.c
new file mode 100644
index 000000000..433ddce0e
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e171m19/freeze.h b/src/Specific/solinas32_2e171m19/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e171m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e174m17/femul.c b/src/Specific/solinas32_2e174m17/femul.c
new file mode 100644
index 000000000..f8629dead
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0x11 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0x11 * ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+{ uint32_t x45 = (uint32_t) (x44 >> 0x14);
+{ uint32_t x46 = ((uint32_t)x44 & 0xfffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint32_t x48 = (uint32_t) (x47 >> 0x13);
+{ uint32_t x49 = ((uint32_t)x47 & 0x7ffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint32_t x51 = (uint32_t) (x50 >> 0x13);
+{ uint32_t x52 = ((uint32_t)x50 & 0x7ffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint32_t x54 = (uint32_t) (x53 >> 0x14);
+{ uint32_t x55 = ((uint32_t)x53 & 0xfffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint32_t x57 = (uint32_t) (x56 >> 0x13);
+{ uint32_t x58 = ((uint32_t)x56 & 0x7ffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint32_t x60 = (uint32_t) (x59 >> 0x13);
+{ uint32_t x61 = ((uint32_t)x59 & 0x7ffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x14);
+{ uint32_t x64 = ((uint32_t)x62 & 0xfffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x13);
+{ uint32_t x67 = ((uint32_t)x65 & 0x7ffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x13);
+{ uint32_t x70 = ((uint32_t)x68 & 0x7ffff);
+{ uint32_t x71 = (x46 + (0x11 * x69));
+{ uint32_t x72 = (x71 >> 0x14);
+{ uint32_t x73 = (x71 & 0xfffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x13);
+{ uint32_t x76 = (x74 & 0x7ffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e174m17/femul.h b/src/Specific/solinas32_2e174m17/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e174m17/fesquare.c b/src/Specific/solinas32_2e174m17/fesquare.c
new file mode 100644
index 000000000..1e1c5913c
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x11 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x11 * ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+{ uint32_t x26 = (uint32_t) (x25 >> 0x14);
+{ uint32_t x27 = ((uint32_t)x25 & 0xfffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint32_t x29 = (uint32_t) (x28 >> 0x13);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7ffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x14);
+{ uint32_t x36 = ((uint32_t)x34 & 0xfffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x14);
+{ uint32_t x45 = ((uint32_t)x43 & 0xfffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+{ uint32_t x52 = (x27 + (0x11 * x50));
+{ uint32_t x53 = (x52 >> 0x14);
+{ uint32_t x54 = (x52 & 0xfffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x13);
+{ uint32_t x57 = (x55 & 0x7ffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e174m17/fesquare.h b/src/Specific/solinas32_2e174m17/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e174m17/freeze.c b/src/Specific/solinas32_2e174m17/freeze.c
new file mode 100644
index 000000000..2db75709d
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 20 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e174m17/freeze.h b/src/Specific/solinas32_2e174m17/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e174m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e174m3/femul.c b/src/Specific/solinas32_2e174m3/femul.c
new file mode 100644
index 000000000..509725888
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ ℤ x24 = (((uint64_t)x5 * x22) +ℤ (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + (((uint64_t)x13 * x17) + ((uint64_t)x12 * x15))))));
+{ ℤ x25 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + (((uint64_t)x11 * x17) + ((uint64_t)x13 * x15))))) +ℤ (0x3 * ((uint64_t)x12 * x22)));
+{ ℤ x26 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + (((uint64_t)x9 * x17) + ((uint64_t)x11 * x15)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x22) + ((uint64_t)x12 * x23))));
+{ ℤ x27 = ((((uint64_t)x5 * x19) + (((uint64_t)x7 * x17) + ((uint64_t)x9 * x15))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x22) + (((uint64_t)x13 * x23) + ((uint64_t)x12 * x21)))));
+{ ℤ x28 = ((((uint64_t)x5 * x17) + ((uint64_t)x7 * x15)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x22) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x12 * x19))))));
+{ ℤ x29 = (((uint64_t)x5 * x15) +ℤ (0x3 *ℤ (((uint64_t)x7 * x22) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x12 * x17)))))));
+{ uint64_t x30 = (x29 >> 0x1d);
+{ uint32_t x31 = (x29 & 0x1fffffff);
+{ ℤ x32 = (x30 +ℤ x28);
+{ uint64_t x33 = (x32 >> 0x1d);
+{ uint32_t x34 = (x32 & 0x1fffffff);
+{ ℤ x35 = (x33 +ℤ x27);
+{ uint64_t x36 = (x35 >> 0x1d);
+{ uint32_t x37 = (x35 & 0x1fffffff);
+{ ℤ x38 = (x36 +ℤ x26);
+{ uint64_t x39 = (x38 >> 0x1d);
+{ uint32_t x40 = (x38 & 0x1fffffff);
+{ ℤ x41 = (x39 +ℤ x25);
+{ uint64_t x42 = (x41 >> 0x1d);
+{ uint32_t x43 = (x41 & 0x1fffffff);
+{ ℤ x44 = (x42 +ℤ x24);
+{ uint64_t x45 = (x44 >> 0x1d);
+{ uint32_t x46 = (x44 & 0x1fffffff);
+{ uint64_t x47 = (x31 + (0x3 * x45));
+{ uint32_t x48 = (uint32_t) (x47 >> 0x1d);
+{ uint32_t x49 = ((uint32_t)x47 & 0x1fffffff);
+{ uint32_t x50 = (x48 + x34);
+{ uint32_t x51 = (x50 >> 0x1d);
+{ uint32_t x52 = (x50 & 0x1fffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e174m3/femul.h b/src/Specific/solinas32_2e174m3/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas32_2e174m3/fesquare.c b/src/Specific/solinas32_2e174m3/fesquare.c
new file mode 100644
index 000000000..6d8132a1a
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ ℤ x11 = (((uint64_t)x2 * x9) +ℤ (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x9 * x2))))));
+{ ℤ x12 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * ((uint64_t)x9 * x9)));
+{ ℤ x13 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x9) + ((uint64_t)x9 * x10))));
+{ ℤ x14 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x9) + (((uint64_t)x10 * x10) + ((uint64_t)x9 * x8)))));
+{ ℤ x15 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x9) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((uint64_t)x9 * x6))))));
+{ ℤ x16 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x9) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((uint64_t)x9 * x4)))))));
+{ uint64_t x17 = (x16 >> 0x1d);
+{ uint32_t x18 = (x16 & 0x1fffffff);
+{ ℤ x19 = (x17 +ℤ x15);
+{ uint64_t x20 = (x19 >> 0x1d);
+{ uint32_t x21 = (x19 & 0x1fffffff);
+{ ℤ x22 = (x20 +ℤ x14);
+{ uint64_t x23 = (x22 >> 0x1d);
+{ uint32_t x24 = (x22 & 0x1fffffff);
+{ ℤ x25 = (x23 +ℤ x13);
+{ uint64_t x26 = (x25 >> 0x1d);
+{ uint32_t x27 = (x25 & 0x1fffffff);
+{ ℤ x28 = (x26 +ℤ x12);
+{ uint64_t x29 = (x28 >> 0x1d);
+{ uint32_t x30 = (x28 & 0x1fffffff);
+{ ℤ x31 = (x29 +ℤ x11);
+{ uint64_t x32 = (x31 >> 0x1d);
+{ uint32_t x33 = (x31 & 0x1fffffff);
+{ uint64_t x34 = (x18 + (0x3 * x32));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x1d);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1fffffff);
+{ uint32_t x37 = (x35 + x21);
+{ uint32_t x38 = (x37 >> 0x1d);
+{ uint32_t x39 = (x37 & 0x1fffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas32_2e174m3/fesquare.h b/src/Specific/solinas32_2e174m3/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e174m3/freeze.c b/src/Specific/solinas32_2e174m3/freeze.c
new file mode 100644
index 000000000..4dd93cd80
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e174m3/freeze.h b/src/Specific/solinas32_2e174m3/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas32_2e174m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e189m25/femul.c b/src/Specific/solinas32_2e189m25/femul.c
new file mode 100644
index 000000000..498964086
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/femul.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
+{ uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0x19 * ((uint64_t)x14 * x26)));
+{ uint64_t x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) + (0x19 * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+{ uint64_t x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) + (0x19 * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+{ ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0x19 *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+{ ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0x19 *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0x19 *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
+{ uint64_t x35 = (x34 >> 0x1b);
+{ uint32_t x36 = (x34 & 0x7ffffff);
+{ ℤ x37 = (x35 +ℤ x33);
+{ uint64_t x38 = (x37 >> 0x1b);
+{ uint32_t x39 = (x37 & 0x7ffffff);
+{ ℤ x40 = (x38 +ℤ x32);
+{ uint64_t x41 = (x40 >> 0x1b);
+{ uint32_t x42 = (x40 & 0x7ffffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint64_t x44 = (x43 >> 0x1b);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint64_t x47 = (x46 >> 0x1b);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x1b);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x1b);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+{ uint64_t x55 = (x36 + (0x19 * x53));
+{ uint32_t x56 = (uint32_t) (x55 >> 0x1b);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7ffffff);
+{ uint32_t x58 = (x56 + x39);
+{ uint32_t x59 = (x58 >> 0x1b);
+{ uint32_t x60 = (x58 & 0x7ffffff);
+out[0] = x54;
+out[1] = x51;
+out[2] = x48;
+out[3] = x45;
+out[4] = x59 + x42;
+out[5] = x60;
+out[6] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e189m25/femul.h b/src/Specific/solinas32_2e189m25/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/solinas32_2e189m25/fesquare.c b/src/Specific/solinas32_2e189m25/fesquare.c
new file mode 100644
index 000000000..c3ba4208b
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/fesquare.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
+{ uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x19 * ((uint64_t)x11 * x11)));
+{ uint64_t x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+{ uint64_t x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+{ ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x19 *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+{ ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x19 *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0x19 *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
+{ uint64_t x20 = (x19 >> 0x1b);
+{ uint32_t x21 = (x19 & 0x7ffffff);
+{ ℤ x22 = (x20 +ℤ x18);
+{ uint64_t x23 = (x22 >> 0x1b);
+{ uint32_t x24 = (x22 & 0x7ffffff);
+{ ℤ x25 = (x23 +ℤ x17);
+{ uint64_t x26 = (x25 >> 0x1b);
+{ uint32_t x27 = (x25 & 0x7ffffff);
+{ uint64_t x28 = (x26 + x16);
+{ uint64_t x29 = (x28 >> 0x1b);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+{ uint64_t x31 = (x29 + x15);
+{ uint64_t x32 = (x31 >> 0x1b);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+{ uint64_t x34 = (x32 + x14);
+{ uint64_t x35 = (x34 >> 0x1b);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+{ uint64_t x37 = (x35 + x13);
+{ uint64_t x38 = (x37 >> 0x1b);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+{ uint64_t x40 = (x21 + (0x19 * x38));
+{ uint32_t x41 = (uint32_t) (x40 >> 0x1b);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+{ uint32_t x43 = (x41 + x24);
+{ uint32_t x44 = (x43 >> 0x1b);
+{ uint32_t x45 = (x43 & 0x7ffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x30;
+out[4] = x44 + x27;
+out[5] = x45;
+out[6] = x42;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e189m25/fesquare.h b/src/Specific/solinas32_2e189m25/fesquare.h
new file mode 100644
index 000000000..fef33c926
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e189m25/freeze.c b/src/Specific/solinas32_2e189m25/freeze.c
new file mode 100644
index 000000000..8314f828c
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e189m25/freeze.h b/src/Specific/solinas32_2e189m25/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas32_2e189m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e190m11/femul.c b/src/Specific/solinas32_2e190m11/femul.c
new file mode 100644
index 000000000..8b6631bbf
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/femul.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x28 = (((uint64_t)x5 * x26) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((0x2 * ((uint64_t)x13 * x21)) + ((0x2 * ((uint64_t)x15 * x19)) + ((uint64_t)x14 * x17)))))));
+{ uint64_t x29 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((0x2 * ((uint64_t)x13 * x19)) + ((uint64_t)x15 * x17)))))) + (0xb * ((uint64_t)x14 * x26)));
+{ uint64_t x30 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((0x2 * ((uint64_t)x11 * x19)) + ((uint64_t)x13 * x17))))) + (0xb * (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+{ uint64_t x31 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((0x2 * ((uint64_t)x9 * x19)) + ((uint64_t)x11 * x17)))) + (0xb * (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+{ uint64_t x32 = ((((uint64_t)x5 * x21) + ((0x2 * ((uint64_t)x7 * x19)) + ((uint64_t)x9 * x17))) + (0xb * (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+{ uint64_t x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) + (0xb * (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x7 * x26)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((0x2 * ((uint64_t)x15 * x21)) + (0x2 * ((uint64_t)x14 * x19)))))))));
+{ uint64_t x35 = (x34 >> 0x1c);
+{ uint32_t x36 = (x34 & 0xfffffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint64_t x38 = (x37 >> 0x1b);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint64_t x41 = (x40 >> 0x1b);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint64_t x44 = (x43 >> 0x1b);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint64_t x47 = (x46 >> 0x1b);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x1b);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x1b);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+{ uint64_t x55 = (x36 + (0xb * x53));
+{ uint32_t x56 = (uint32_t) (x55 >> 0x1c);
+{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+{ uint32_t x58 = (x56 + x39);
+{ uint32_t x59 = (x58 >> 0x1b);
+{ uint32_t x60 = (x58 & 0x7ffffff);
+out[0] = x54;
+out[1] = x51;
+out[2] = x48;
+out[3] = x45;
+out[4] = x59 + x42;
+out[5] = x60;
+out[6] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e190m11/femul.h b/src/Specific/solinas32_2e190m11/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/solinas32_2e190m11/fesquare.c b/src/Specific/solinas32_2e190m11/fesquare.c
new file mode 100644
index 000000000..3974e00c4
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/fesquare.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (((uint64_t)x2 * x11) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x11 * x2)))))));
+{ uint64_t x14 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xb * ((uint64_t)x11 * x11)));
+{ uint64_t x15 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xb * (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+{ uint64_t x16 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xb * (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+{ uint64_t x17 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xb * (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xb * (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xb *ℤ ((0x2 * ((uint64_t)x4 * x11)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + (0x2 * ((uint64_t)x11 * x4)))))))));
+{ uint64_t x20 = (x19 >> 0x1c);
+{ uint32_t x21 = (x19 & 0xfffffff);
+{ uint64_t x22 = (x20 + x18);
+{ uint64_t x23 = (x22 >> 0x1b);
+{ uint32_t x24 = ((uint32_t)x22 & 0x7ffffff);
+{ uint64_t x25 = (x23 + x17);
+{ uint64_t x26 = (x25 >> 0x1b);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+{ uint64_t x28 = (x26 + x16);
+{ uint64_t x29 = (x28 >> 0x1b);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+{ uint64_t x31 = (x29 + x15);
+{ uint64_t x32 = (x31 >> 0x1b);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+{ uint64_t x34 = (x32 + x14);
+{ uint64_t x35 = (x34 >> 0x1b);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+{ uint64_t x37 = (x35 + x13);
+{ uint64_t x38 = (x37 >> 0x1b);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+{ uint64_t x40 = (x21 + (0xb * x38));
+{ uint32_t x41 = (uint32_t) (x40 >> 0x1c);
+{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+{ uint32_t x43 = (x41 + x24);
+{ uint32_t x44 = (x43 >> 0x1b);
+{ uint32_t x45 = (x43 & 0x7ffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x30;
+out[4] = x44 + x27;
+out[5] = x45;
+out[6] = x42;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e190m11/fesquare.h b/src/Specific/solinas32_2e190m11/fesquare.h
new file mode 100644
index 000000000..fef33c926
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e190m11/freeze.c b/src/Specific/solinas32_2e190m11/freeze.c
new file mode 100644
index 000000000..b82e016e2
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e190m11/freeze.h b/src/Specific/solinas32_2e190m11/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas32_2e190m11/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e191m19/femul.c b/src/Specific/solinas32_2e191m19/femul.c
new file mode 100644
index 000000000..c4ef7a3cf
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * ((uint64_t)x20 * x38)));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+{ uint32_t x50 = (uint32_t) (x49 >> 0x14);
+{ uint32_t x51 = ((uint32_t)x49 & 0xfffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x13);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x13);
+{ uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x13);
+{ uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x13);
+{ uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint32_t x71 = (uint32_t) (x70 >> 0x13);
+{ uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint32_t x74 = (uint32_t) (x73 >> 0x13);
+{ uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x13);
+{ uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
+{ uint32_t x79 = (x51 + (0x13 * x77));
+{ uint32_t x80 = (x79 >> 0x14);
+{ uint32_t x81 = (x79 & 0xfffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x13);
+{ uint32_t x84 = (x82 & 0x7ffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e191m19/femul.h b/src/Specific/solinas32_2e191m19/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e191m19/fesquare.c b/src/Specific/solinas32_2e191m19/fesquare.c
new file mode 100644
index 000000000..a26b852d2
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((uint64_t)x17 * x17)));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+{ uint32_t x29 = (uint32_t) (x28 >> 0x14);
+{ uint32_t x30 = ((uint32_t)x28 & 0xfffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x13);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x13);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x13);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x13);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+{ uint32_t x58 = (x30 + (0x13 * x56));
+{ uint32_t x59 = (x58 >> 0x14);
+{ uint32_t x60 = (x58 & 0xfffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x13);
+{ uint32_t x63 = (x61 & 0x7ffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e191m19/fesquare.h b/src/Specific/solinas32_2e191m19/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e191m19/freeze.c b/src/Specific/solinas32_2e191m19/freeze.c
new file mode 100644
index 000000000..d015ecc2b
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 20 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e191m19/freeze.h b/src/Specific/solinas32_2e191m19/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e191m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e192m2e64m1/femul.c b/src/Specific/solinas32_2e192m2e64m1/femul.c
new file mode 100644
index 000000000..b05266d7c
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/femul.c
@@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ ℤ x32 = ((((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) +ℤ (0x10000000000 *ℤ ((uint64_t)x16 * x30))));
+{ ℤ x33 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x17 * x19))))))) +ℤ (((uint64_t)x16 * x30) +ℤ (0x10000 *ℤ (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))))));
+{ ℤ x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) +ℤ ((((uint64_t)x17 * x30) + ((uint64_t)x16 * x31)) +ℤ (0x10000 *ℤ (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))))));
+{ ℤ x35 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + ((uint64_t)x13 * x19))))) +ℤ ((((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29))) +ℤ (0x10000 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))))));
+{ ℤ x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) +ℤ ((((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))) +ℤ (0x10000 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))))));
+{ ℤ x37 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) +ℤ ((((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25))))) +ℤ (0x10000 *ℤ (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))))));
+{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23)))))));
+{ uint64_t x39 = (((uint64_t)x5 * x19) + (((uint64_t)x7 * x30) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x16 * x21))))))));
+{ uint32_t x40 = (uint32_t) (x38 >> 0x18);
+{ uint32_t x41 = ((uint32_t)x38 & 0xffffff);
+{ ℤ x42 = (x32 >>ℤ 0x18);
+{ uint32_t x43 = (x32 & 0xffffff);
+{ ℤ x44 = ((0x1000000 *ℤ x42) +ℤ x43);
+{ ℤ x45 = (x44 >>ℤ 0x18);
+{ uint32_t x46 = (x44 & 0xffffff);
+{ ℤ x47 = ((x40 +ℤ x37) +ℤ (0x10000 *ℤ x45));
+{ uint64_t x48 = (x47 >> 0x18);
+{ uint32_t x49 = (x47 & 0xffffff);
+{ ℤ x50 = (x39 +ℤ x45);
+{ uint64_t x51 = (x50 >> 0x18);
+{ uint32_t x52 = (x50 & 0xffffff);
+{ ℤ x53 = (x48 +ℤ x36);
+{ uint64_t x54 = (x53 >> 0x18);
+{ uint32_t x55 = (x53 & 0xffffff);
+{ uint64_t x56 = (x51 + x41);
+{ uint32_t x57 = (uint32_t) (x56 >> 0x18);
+{ uint32_t x58 = ((uint32_t)x56 & 0xffffff);
+{ ℤ x59 = (x54 +ℤ x35);
+{ uint64_t x60 = (x59 >> 0x18);
+{ uint32_t x61 = (x59 & 0xffffff);
+{ ℤ x62 = (x60 +ℤ x34);
+{ uint64_t x63 = (x62 >> 0x18);
+{ uint32_t x64 = (x62 & 0xffffff);
+{ ℤ x65 = (x63 +ℤ x33);
+{ uint64_t x66 = (x65 >> 0x18);
+{ uint32_t x67 = (x65 & 0xffffff);
+{ uint64_t x68 = (x66 + x46);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x18);
+{ uint32_t x70 = ((uint32_t)x68 & 0xffffff);
+{ uint64_t x71 = (((uint64_t)0x1000000 * x69) + x70);
+{ uint32_t x72 = (uint32_t) (x71 >> 0x18);
+{ uint32_t x73 = ((uint32_t)x71 & 0xffffff);
+{ uint64_t x74 = ((x57 + x49) + ((uint64_t)0x10000 * x72));
+{ uint32_t x75 = (uint32_t) (x74 >> 0x18);
+{ uint32_t x76 = ((uint32_t)x74 & 0xffffff);
+{ uint32_t x77 = (x52 + x72);
+{ uint32_t x78 = (x77 >> 0x18);
+{ uint32_t x79 = (x77 & 0xffffff);
+out[0] = x73;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x75 + x55;
+out[5] = x76;
+out[6] = x78 + x58;
+out[7] = x79;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e192m2e64m1/femul.h b/src/Specific/solinas32_2e192m2e64m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e192m2e64m1/fesquare.c b/src/Specific/solinas32_2e192m2e64m1/fesquare.c
new file mode 100644
index 000000000..9beed8725
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/fesquare.c
@@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ ℤ x15 = ((((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2)))))))) +ℤ ((0x10000 *ℤ (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) +ℤ (0x10000000000 *ℤ ((uint64_t)x13 * x13))));
+{ ℤ x16 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x13 * x13) +ℤ (0x10000 *ℤ (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))))));
+{ ℤ x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x13) + ((uint64_t)x13 * x14)) +ℤ (0x10000 *ℤ (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))))));
+{ ℤ x18 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12))) +ℤ (0x10000 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))))));
+{ ℤ x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ ((((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))) +ℤ (0x10000 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))))));
+{ ℤ x20 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8))))) +ℤ (0x10000 *ℤ (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))))));
+{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6)))))));
+{ uint64_t x22 = (((uint64_t)x2 * x2) + (((uint64_t)x4 * x13) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x13 * x4))))))));
+{ uint32_t x23 = (uint32_t) (x21 >> 0x18);
+{ uint32_t x24 = ((uint32_t)x21 & 0xffffff);
+{ ℤ x25 = (x15 >>ℤ 0x18);
+{ uint32_t x26 = (x15 & 0xffffff);
+{ ℤ x27 = ((0x1000000 *ℤ x25) +ℤ x26);
+{ ℤ x28 = (x27 >>ℤ 0x18);
+{ uint32_t x29 = (x27 & 0xffffff);
+{ ℤ x30 = ((x23 +ℤ x20) +ℤ (0x10000 *ℤ x28));
+{ uint64_t x31 = (x30 >> 0x18);
+{ uint32_t x32 = (x30 & 0xffffff);
+{ ℤ x33 = (x22 +ℤ x28);
+{ uint64_t x34 = (x33 >> 0x18);
+{ uint32_t x35 = (x33 & 0xffffff);
+{ ℤ x36 = (x31 +ℤ x19);
+{ uint64_t x37 = (x36 >> 0x18);
+{ uint32_t x38 = (x36 & 0xffffff);
+{ uint64_t x39 = (x34 + x24);
+{ uint32_t x40 = (uint32_t) (x39 >> 0x18);
+{ uint32_t x41 = ((uint32_t)x39 & 0xffffff);
+{ ℤ x42 = (x37 +ℤ x18);
+{ uint64_t x43 = (x42 >> 0x18);
+{ uint32_t x44 = (x42 & 0xffffff);
+{ ℤ x45 = (x43 +ℤ x17);
+{ uint64_t x46 = (x45 >> 0x18);
+{ uint32_t x47 = (x45 & 0xffffff);
+{ ℤ x48 = (x46 +ℤ x16);
+{ uint64_t x49 = (x48 >> 0x18);
+{ uint32_t x50 = (x48 & 0xffffff);
+{ uint64_t x51 = (x49 + x29);
+{ uint32_t x52 = (uint32_t) (x51 >> 0x18);
+{ uint32_t x53 = ((uint32_t)x51 & 0xffffff);
+{ uint64_t x54 = (((uint64_t)0x1000000 * x52) + x53);
+{ uint32_t x55 = (uint32_t) (x54 >> 0x18);
+{ uint32_t x56 = ((uint32_t)x54 & 0xffffff);
+{ uint64_t x57 = ((x40 + x32) + ((uint64_t)0x10000 * x55));
+{ uint32_t x58 = (uint32_t) (x57 >> 0x18);
+{ uint32_t x59 = ((uint32_t)x57 & 0xffffff);
+{ uint32_t x60 = (x35 + x55);
+{ uint32_t x61 = (x60 >> 0x18);
+{ uint32_t x62 = (x60 & 0xffffff);
+out[0] = x56;
+out[1] = x50;
+out[2] = x47;
+out[3] = x44;
+out[4] = x58 + x38;
+out[5] = x59;
+out[6] = x61 + x41;
+out[7] = x62;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e192m2e64m1/fesquare.h b/src/Specific/solinas32_2e192m2e64m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e192m2e64m1/freeze.c b/src/Specific/solinas32_2e192m2e64m1/freeze.c
new file mode 100644
index 000000000..ac3559f88
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e192m2e64m1/freeze.h b/src/Specific/solinas32_2e192m2e64m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e192m2e64m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e194m33/femul.c b/src/Specific/solinas32_2e194m33/femul.c
new file mode 100644
index 000000000..c12303cb3
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x21 * ((uint64_t)x16 * x30)));
+{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x21 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x21 * ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
+{ uint64_t x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) + (0x21 * (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) + (0x21 * (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x21 * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+{ uint64_t x39 = (((uint64_t)x5 * x19) + (0x21 * ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+{ uint64_t x40 = (x39 >> 0x19);
+{ uint32_t x41 = ((uint32_t)x39 & 0x1ffffff);
+{ uint64_t x42 = (x40 + x38);
+{ uint64_t x43 = (x42 >> 0x18);
+{ uint32_t x44 = ((uint32_t)x42 & 0xffffff);
+{ uint64_t x45 = (x43 + x37);
+{ uint64_t x46 = (x45 >> 0x18);
+{ uint32_t x47 = ((uint32_t)x45 & 0xffffff);
+{ uint64_t x48 = (x46 + x36);
+{ uint64_t x49 = (x48 >> 0x18);
+{ uint32_t x50 = ((uint32_t)x48 & 0xffffff);
+{ uint64_t x51 = (x49 + x35);
+{ uint64_t x52 = (x51 >> 0x19);
+{ uint32_t x53 = ((uint32_t)x51 & 0x1ffffff);
+{ uint64_t x54 = (x52 + x34);
+{ uint64_t x55 = (x54 >> 0x18);
+{ uint32_t x56 = ((uint32_t)x54 & 0xffffff);
+{ uint64_t x57 = (x55 + x33);
+{ uint64_t x58 = (x57 >> 0x18);
+{ uint32_t x59 = ((uint32_t)x57 & 0xffffff);
+{ uint64_t x60 = (x58 + x32);
+{ uint32_t x61 = (uint32_t) (x60 >> 0x18);
+{ uint32_t x62 = ((uint32_t)x60 & 0xffffff);
+{ uint64_t x63 = (x41 + ((uint64_t)0x21 * x61));
+{ uint32_t x64 = (uint32_t) (x63 >> 0x19);
+{ uint32_t x65 = ((uint32_t)x63 & 0x1ffffff);
+{ uint32_t x66 = (x64 + x44);
+{ uint32_t x67 = (x66 >> 0x18);
+{ uint32_t x68 = (x66 & 0xffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e194m33/femul.h b/src/Specific/solinas32_2e194m33/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e194m33/fesquare.c b/src/Specific/solinas32_2e194m33/fesquare.c
new file mode 100644
index 000000000..0a48a9893
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x21 * ((uint64_t)x13 * x13)));
+{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x21 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x21 * ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
+{ uint64_t x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x21 * (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x21 * (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x21 * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+{ uint64_t x22 = (((uint64_t)x2 * x2) + (0x21 * ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+{ uint64_t x23 = (x22 >> 0x19);
+{ uint32_t x24 = ((uint32_t)x22 & 0x1ffffff);
+{ uint64_t x25 = (x23 + x21);
+{ uint64_t x26 = (x25 >> 0x18);
+{ uint32_t x27 = ((uint32_t)x25 & 0xffffff);
+{ uint64_t x28 = (x26 + x20);
+{ uint64_t x29 = (x28 >> 0x18);
+{ uint32_t x30 = ((uint32_t)x28 & 0xffffff);
+{ uint64_t x31 = (x29 + x19);
+{ uint64_t x32 = (x31 >> 0x18);
+{ uint32_t x33 = ((uint32_t)x31 & 0xffffff);
+{ uint64_t x34 = (x32 + x18);
+{ uint64_t x35 = (x34 >> 0x19);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+{ uint64_t x37 = (x35 + x17);
+{ uint64_t x38 = (x37 >> 0x18);
+{ uint32_t x39 = ((uint32_t)x37 & 0xffffff);
+{ uint64_t x40 = (x38 + x16);
+{ uint64_t x41 = (x40 >> 0x18);
+{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+{ uint64_t x43 = (x41 + x15);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x18);
+{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+{ uint64_t x46 = (x24 + ((uint64_t)0x21 * x44));
+{ uint32_t x47 = (uint32_t) (x46 >> 0x19);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+{ uint32_t x49 = (x47 + x27);
+{ uint32_t x50 = (x49 >> 0x18);
+{ uint32_t x51 = (x49 & 0xffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e194m33/fesquare.h b/src/Specific/solinas32_2e194m33/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e194m33/freeze.c b/src/Specific/solinas32_2e194m33/freeze.c
new file mode 100644
index 000000000..9a73df9dc
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffdf;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e194m33/freeze.h b/src/Specific/solinas32_2e194m33/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e194m33/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e196m15/femul.c b/src/Specific/solinas32_2e196m15/femul.c
new file mode 100644
index 000000000..a3ff58741
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/femul.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint64_t x28 = (((uint64_t)x5 * x26) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + (((uint64_t)x15 * x19) + ((uint64_t)x14 * x17)))))));
+{ uint64_t x29 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + (((uint64_t)x11 * x21) + (((uint64_t)x13 * x19) + ((uint64_t)x15 * x17)))))) + (0xf * ((uint64_t)x14 * x26)));
+{ ℤ x30 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + (((uint64_t)x11 * x19) + ((uint64_t)x13 * x17))))) +ℤ (0xf *ℤ (((uint64_t)x15 * x26) + ((uint64_t)x14 * x27))));
+{ ℤ x31 = ((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + (((uint64_t)x9 * x19) + ((uint64_t)x11 * x17)))) +ℤ (0xf *ℤ (((uint64_t)x13 * x26) + (((uint64_t)x15 * x27) + ((uint64_t)x14 * x25)))));
+{ ℤ x32 = ((((uint64_t)x5 * x21) + (((uint64_t)x7 * x19) + ((uint64_t)x9 * x17))) +ℤ (0xf *ℤ (((uint64_t)x11 * x26) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x14 * x23))))));
+{ ℤ x33 = ((((uint64_t)x5 * x19) + ((uint64_t)x7 * x17)) +ℤ (0xf *ℤ (((uint64_t)x9 * x26) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x14 * x21)))))));
+{ ℤ x34 = (((uint64_t)x5 * x17) +ℤ (0xf *ℤ (((uint64_t)x7 * x26) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + (((uint64_t)x15 * x21) + ((uint64_t)x14 * x19))))))));
+{ uint64_t x35 = (x34 >> 0x1c);
+{ uint32_t x36 = (x34 & 0xfffffff);
+{ ℤ x37 = (x35 +ℤ x33);
+{ uint64_t x38 = (x37 >> 0x1c);
+{ uint32_t x39 = (x37 & 0xfffffff);
+{ ℤ x40 = (x38 +ℤ x32);
+{ uint64_t x41 = (x40 >> 0x1c);
+{ uint32_t x42 = (x40 & 0xfffffff);
+{ ℤ x43 = (x41 +ℤ x31);
+{ uint64_t x44 = (x43 >> 0x1c);
+{ uint32_t x45 = (x43 & 0xfffffff);
+{ ℤ x46 = (x44 +ℤ x30);
+{ uint64_t x47 = (x46 >> 0x1c);
+{ uint32_t x48 = (x46 & 0xfffffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x1c);
+{ uint32_t x51 = ((uint32_t)x49 & 0xfffffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x1c);
+{ uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
+{ uint64_t x55 = (x36 + (0xf * x53));
+{ uint32_t x56 = (uint32_t) (x55 >> 0x1c);
+{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+{ uint32_t x58 = (x56 + x39);
+{ uint32_t x59 = (x58 >> 0x1c);
+{ uint32_t x60 = (x58 & 0xfffffff);
+out[0] = x54;
+out[1] = x51;
+out[2] = x48;
+out[3] = x45;
+out[4] = x59 + x42;
+out[5] = x60;
+out[6] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e196m15/femul.h b/src/Specific/solinas32_2e196m15/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/solinas32_2e196m15/fesquare.c b/src/Specific/solinas32_2e196m15/fesquare.c
new file mode 100644
index 000000000..eb632a21d
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/fesquare.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x13 = (((uint64_t)x2 * x11) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x11 * x2)))))));
+{ uint64_t x14 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * ((uint64_t)x11 * x11)));
+{ ℤ x15 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0xf *ℤ (((uint64_t)x12 * x11) + ((uint64_t)x11 * x12))));
+{ ℤ x16 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0xf *ℤ (((uint64_t)x10 * x11) + (((uint64_t)x12 * x12) + ((uint64_t)x11 * x10)))));
+{ ℤ x17 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0xf *ℤ (((uint64_t)x8 * x11) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x11 * x8))))));
+{ ℤ x18 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0xf *ℤ (((uint64_t)x6 * x11) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x11 * x6)))))));
+{ ℤ x19 = (((uint64_t)x2 * x2) +ℤ (0xf *ℤ (((uint64_t)x4 * x11) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x11 * x4))))))));
+{ uint64_t x20 = (x19 >> 0x1c);
+{ uint32_t x21 = (x19 & 0xfffffff);
+{ ℤ x22 = (x20 +ℤ x18);
+{ uint64_t x23 = (x22 >> 0x1c);
+{ uint32_t x24 = (x22 & 0xfffffff);
+{ ℤ x25 = (x23 +ℤ x17);
+{ uint64_t x26 = (x25 >> 0x1c);
+{ uint32_t x27 = (x25 & 0xfffffff);
+{ ℤ x28 = (x26 +ℤ x16);
+{ uint64_t x29 = (x28 >> 0x1c);
+{ uint32_t x30 = (x28 & 0xfffffff);
+{ ℤ x31 = (x29 +ℤ x15);
+{ uint64_t x32 = (x31 >> 0x1c);
+{ uint32_t x33 = (x31 & 0xfffffff);
+{ uint64_t x34 = (x32 + x14);
+{ uint64_t x35 = (x34 >> 0x1c);
+{ uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
+{ uint64_t x37 = (x35 + x13);
+{ uint64_t x38 = (x37 >> 0x1c);
+{ uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
+{ uint64_t x40 = (x21 + (0xf * x38));
+{ uint32_t x41 = (uint32_t) (x40 >> 0x1c);
+{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+{ uint32_t x43 = (x41 + x24);
+{ uint32_t x44 = (x43 >> 0x1c);
+{ uint32_t x45 = (x43 & 0xfffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x30;
+out[4] = x44 + x27;
+out[5] = x45;
+out[6] = x42;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas32_2e196m15/fesquare.h b/src/Specific/solinas32_2e196m15/fesquare.h
new file mode 100644
index 000000000..fef33c926
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e196m15/freeze.c b/src/Specific/solinas32_2e196m15/freeze.c
new file mode 100644
index 000000000..ebaee8ab2
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e196m15/freeze.h b/src/Specific/solinas32_2e196m15/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas32_2e196m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e198m17/femul.c b/src/Specific/solinas32_2e198m17/femul.c
new file mode 100644
index 000000000..3de6c467f
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x11 * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x11 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x11 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x11 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x11 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x11 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x11 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x11 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+{ uint64_t x45 = (x44 >> 0x16);
+{ uint32_t x46 = ((uint32_t)x44 & 0x3fffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint64_t x48 = (x47 >> 0x16);
+{ uint32_t x49 = ((uint32_t)x47 & 0x3fffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint64_t x51 = (x50 >> 0x16);
+{ uint32_t x52 = ((uint32_t)x50 & 0x3fffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint32_t x54 = (uint32_t) (x53 >> 0x16);
+{ uint32_t x55 = ((uint32_t)x53 & 0x3fffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint32_t x57 = (uint32_t) (x56 >> 0x16);
+{ uint32_t x58 = ((uint32_t)x56 & 0x3fffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint32_t x60 = (uint32_t) (x59 >> 0x16);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x16);
+{ uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x16);
+{ uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x16);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+{ uint64_t x71 = (x46 + ((uint64_t)0x11 * x69));
+{ uint32_t x72 = (uint32_t) (x71 >> 0x16);
+{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x16);
+{ uint32_t x76 = (x74 & 0x3fffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e198m17/femul.h b/src/Specific/solinas32_2e198m17/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e198m17/fesquare.c b/src/Specific/solinas32_2e198m17/fesquare.c
new file mode 100644
index 000000000..3685d9d70
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+{ uint64_t x26 = (x25 >> 0x16);
+{ uint32_t x27 = ((uint32_t)x25 & 0x3fffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint64_t x29 = (x28 >> 0x16);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3fffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint64_t x32 = (x31 >> 0x16);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+{ uint64_t x52 = (x27 + ((uint64_t)0x11 * x50));
+{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x16);
+{ uint32_t x57 = (x55 & 0x3fffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e198m17/fesquare.h b/src/Specific/solinas32_2e198m17/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e198m17/freeze.c b/src/Specific/solinas32_2e198m17/freeze.c
new file mode 100644
index 000000000..256e5606e
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e198m17/freeze.h b/src/Specific/solinas32_2e198m17/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e198m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e205m45x2e198m1/freeze.c b/src/Specific/solinas32_2e205m45x2e198m1/freeze.c
new file mode 100644
index 000000000..f34680b5a
--- /dev/null
+++ b/src/Specific/solinas32_2e205m45x2e198m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 21 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e205m45x2e198m1/freeze.h b/src/Specific/solinas32_2e205m45x2e198m1/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e205m45x2e198m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e206m5/femul.c b/src/Specific/solinas32_2e206m5/femul.c
new file mode 100644
index 000000000..a3f804790
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x5 * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x5 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x5 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x5 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x5 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x5 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x5 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x5 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x5 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x5 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x5 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint32_t x60 = (uint32_t) (x59 >> 0x12);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3ffff);
+{ uint64_t x62 = (x60 + x58);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x11);
+{ uint32_t x64 = ((uint32_t)x62 & 0x1ffff);
+{ uint64_t x65 = (x63 + x57);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x11);
+{ uint32_t x67 = ((uint32_t)x65 & 0x1ffff);
+{ uint64_t x68 = (x66 + x56);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x11);
+{ uint32_t x70 = ((uint32_t)x68 & 0x1ffff);
+{ uint64_t x71 = (x69 + x55);
+{ uint32_t x72 = (uint32_t) (x71 >> 0x11);
+{ uint32_t x73 = ((uint32_t)x71 & 0x1ffff);
+{ uint64_t x74 = (x72 + x54);
+{ uint32_t x75 = (uint32_t) (x74 >> 0x11);
+{ uint32_t x76 = ((uint32_t)x74 & 0x1ffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint32_t x78 = (uint32_t) (x77 >> 0x12);
+{ uint32_t x79 = ((uint32_t)x77 & 0x3ffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint32_t x81 = (uint32_t) (x80 >> 0x11);
+{ uint32_t x82 = ((uint32_t)x80 & 0x1ffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint32_t x84 = (uint32_t) (x83 >> 0x11);
+{ uint32_t x85 = ((uint32_t)x83 & 0x1ffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint32_t x87 = (uint32_t) (x86 >> 0x11);
+{ uint32_t x88 = ((uint32_t)x86 & 0x1ffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint32_t x90 = (uint32_t) (x89 >> 0x11);
+{ uint32_t x91 = ((uint32_t)x89 & 0x1ffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x11);
+{ uint32_t x94 = ((uint32_t)x92 & 0x1ffff);
+{ uint32_t x95 = (x61 + (0x5 * x93));
+{ uint32_t x96 = (x95 >> 0x12);
+{ uint32_t x97 = (x95 & 0x3ffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x11);
+{ uint32_t x100 = (x98 & 0x1ffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e206m5/femul.h b/src/Specific/solinas32_2e206m5/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e206m5/fesquare.c b/src/Specific/solinas32_2e206m5/fesquare.c
new file mode 100644
index 000000000..fb8fba0cc
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x5 * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x5 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x5 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x5 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x5 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x5 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x5 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x5 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x5 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x12);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3ffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x11);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1ffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x11);
+{ uint32_t x42 = ((uint32_t)x40 & 0x1ffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x11);
+{ uint32_t x45 = ((uint32_t)x43 & 0x1ffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x11);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1ffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x11);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1ffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x12);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3ffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x11);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1ffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x11);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1ffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x11);
+{ uint32_t x63 = ((uint32_t)x61 & 0x1ffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x11);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1ffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x11);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1ffff);
+{ uint32_t x70 = (x36 + (0x5 * x68));
+{ uint32_t x71 = (x70 >> 0x12);
+{ uint32_t x72 = (x70 & 0x3ffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x11);
+{ uint32_t x75 = (x73 & 0x1ffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e206m5/fesquare.h b/src/Specific/solinas32_2e206m5/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e206m5/freeze.c b/src/Specific/solinas32_2e206m5/freeze.c
new file mode 100644
index 000000000..77a911340
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 18 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e206m5/freeze.h b/src/Specific/solinas32_2e206m5/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e206m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e212m29/femul.c b/src/Specific/solinas32_2e212m29/femul.c
new file mode 100644
index 000000000..56a5f72be
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x32 = (((uint64_t)x5 * x30) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + (((uint64_t)x17 * x21) + ((uint64_t)x16 * x19))))))));
+{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x1d * (0x2 * ((uint64_t)x16 * x30))));
+{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x1d * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+{ uint64_t x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + (((uint64_t)x9 * x23) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) + (0x1d * ((0x2 * ((uint64_t)x15 * x30)) + (((uint64_t)x17 * x31) + (0x2 * ((uint64_t)x16 * x29))))));
+{ uint64_t x36 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (0x1d * (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27))))));
+{ ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x11 * x30)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + (0x2 * ((uint64_t)x16 * x25))))))));
+{ uint64_t x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (0x1d * (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+{ ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x7 * x30)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + (((uint64_t)x17 * x23) + (0x2 * ((uint64_t)x16 * x21))))))))));
+{ uint64_t x40 = (x39 >> 0x1b);
+{ uint32_t x41 = (x39 & 0x7ffffff);
+{ uint64_t x42 = (x40 + x38);
+{ uint64_t x43 = (x42 >> 0x1a);
+{ uint32_t x44 = ((uint32_t)x42 & 0x3ffffff);
+{ ℤ x45 = (x43 +ℤ x37);
+{ uint64_t x46 = (x45 >> 0x1b);
+{ uint32_t x47 = (x45 & 0x7ffffff);
+{ uint64_t x48 = (x46 + x36);
+{ uint64_t x49 = (x48 >> 0x1a);
+{ uint32_t x50 = ((uint32_t)x48 & 0x3ffffff);
+{ uint64_t x51 = (x49 + x35);
+{ uint64_t x52 = (x51 >> 0x1b);
+{ uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
+{ uint64_t x54 = (x52 + x34);
+{ uint64_t x55 = (x54 >> 0x1a);
+{ uint32_t x56 = ((uint32_t)x54 & 0x3ffffff);
+{ uint64_t x57 = (x55 + x33);
+{ uint64_t x58 = (x57 >> 0x1b);
+{ uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
+{ uint64_t x60 = (x58 + x32);
+{ uint64_t x61 = (x60 >> 0x1a);
+{ uint32_t x62 = ((uint32_t)x60 & 0x3ffffff);
+{ uint64_t x63 = (x41 + (0x1d * x61));
+{ uint32_t x64 = (uint32_t) (x63 >> 0x1b);
+{ uint32_t x65 = ((uint32_t)x63 & 0x7ffffff);
+{ uint32_t x66 = (x64 + x44);
+{ uint32_t x67 = (x66 >> 0x1a);
+{ uint32_t x68 = (x66 & 0x3ffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e212m29/femul.h b/src/Specific/solinas32_2e212m29/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e212m29/fesquare.c b/src/Specific/solinas32_2e212m29/fesquare.c
new file mode 100644
index 000000000..a7ef68a8d
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (((uint64_t)x2 * x13) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x13 * x2))))))));
+{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1d * (0x2 * ((uint64_t)x13 * x13))));
+{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1d * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+{ uint64_t x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1d * ((0x2 * ((uint64_t)x12 * x13)) + (((uint64_t)x14 * x14) + (0x2 * ((uint64_t)x13 * x12))))));
+{ uint64_t x19 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1d * (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10))))));
+{ ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x8 * x13)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (0x2 * ((uint64_t)x13 * x8))))))));
+{ uint64_t x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1d * (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+{ ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x1d *ℤ ((0x2 * ((uint64_t)x4 * x13)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + (0x2 * ((uint64_t)x13 * x4))))))))));
+{ uint64_t x23 = (x22 >> 0x1b);
+{ uint32_t x24 = (x22 & 0x7ffffff);
+{ uint64_t x25 = (x23 + x21);
+{ uint64_t x26 = (x25 >> 0x1a);
+{ uint32_t x27 = ((uint32_t)x25 & 0x3ffffff);
+{ ℤ x28 = (x26 +ℤ x20);
+{ uint64_t x29 = (x28 >> 0x1b);
+{ uint32_t x30 = (x28 & 0x7ffffff);
+{ uint64_t x31 = (x29 + x19);
+{ uint64_t x32 = (x31 >> 0x1a);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+{ uint64_t x34 = (x32 + x18);
+{ uint64_t x35 = (x34 >> 0x1b);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+{ uint64_t x37 = (x35 + x17);
+{ uint64_t x38 = (x37 >> 0x1a);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+{ uint64_t x40 = (x38 + x16);
+{ uint64_t x41 = (x40 >> 0x1b);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+{ uint64_t x43 = (x41 + x15);
+{ uint64_t x44 = (x43 >> 0x1a);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+{ uint64_t x46 = (x24 + (0x1d * x44));
+{ uint32_t x47 = (uint32_t) (x46 >> 0x1b);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+{ uint32_t x49 = (x47 + x27);
+{ uint32_t x50 = (x49 >> 0x1a);
+{ uint32_t x51 = (x49 & 0x3ffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e212m29/fesquare.h b/src/Specific/solinas32_2e212m29/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e212m29/freeze.c b/src/Specific/solinas32_2e212m29/freeze.c
new file mode 100644
index 000000000..0e748bcc8
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffe3;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e212m29/freeze.h b/src/Specific/solinas32_2e212m29/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e212m29/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e213m3/femul.c b/src/Specific/solinas32_2e213m3/femul.c
new file mode 100644
index 000000000..4d09e2dc4
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/femul.c
@@ -0,0 +1,101 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)0x2 * (x7 * x59)) + (((uint64_t)0x2 * (x9 * x57)) + (((uint64_t)0x2 * (x11 * x55)) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)0x2 * (x17 * x49)) + (((uint64_t)0x2 * (x19 * x47)) + (((uint64_t)0x2 * (x21 * x45)) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)0x2 * (x27 * x39)) + (((uint64_t)0x2 * (x29 * x37)) + (((uint64_t)0x2 * (x31 * x35)) + ((uint64_t)x30 * x33)))))))))))))));
+{ uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)0x2 * (x7 * x57)) + (((uint64_t)0x2 * (x9 * x55)) + (((uint64_t)x11 * x53) + ((x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)0x2 * (x17 * x47)) + (((uint64_t)0x2 * (x19 * x45)) + (((uint64_t)x21 * x43) + ((x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)0x2 * (x27 * x37)) + (((uint64_t)0x2 * (x29 * x35)) + ((uint64_t)x31 * x33)))))))))))))) + ((uint64_t)0x3 * (x30 * x58)));
+{ uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)0x2 * (x7 * x55)) + (((uint64_t)x9 * x53) + ((x11 * x51) + ((x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)0x2 * (x17 * x45)) + (((uint64_t)x19 * x43) + ((x21 * x41) + ((x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)0x2 * (x27 * x35)) + ((uint64_t)x29 * x33))))))))))))) + (0x3 * ((uint64_t)(x31 * x58) + (x30 * x59))));
+{ uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + ((x9 * x51) + ((x11 * x49) + ((x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((x19 * x41) + ((x21 * x39) + ((x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x3 * ((x29 * x58) + ((uint64_t)(x31 * x59) + (x30 * x57)))));
+{ uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)0x2 * (x7 * x51)) + (((uint64_t)0x2 * (x9 * x49)) + (((uint64_t)0x2 * (x11 * x47)) + (((uint64_t)0x2 * (x13 * x45)) + (((uint64_t)x15 * x43) + (((uint64_t)0x2 * (x17 * x41)) + (((uint64_t)0x2 * (x19 * x39)) + (((uint64_t)0x2 * (x21 * x37)) + (((uint64_t)0x2 * (x23 * x35)) + ((uint64_t)x25 * x33))))))))))) + (0x3 * (((uint64_t)0x2 * (x27 * x58)) + (((uint64_t)0x2 * (x29 * x59)) + (((uint64_t)0x2 * (x31 * x57)) + ((uint64_t)0x2 * (x30 * x55)))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)0x2 * (x7 * x49)) + (((uint64_t)0x2 * (x9 * x47)) + (((uint64_t)0x2 * (x11 * x45)) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)0x2 * (x17 * x39)) + (((uint64_t)0x2 * (x19 * x37)) + (((uint64_t)0x2 * (x21 * x35)) + ((uint64_t)x23 * x33)))))))))) + (0x3 * (((uint64_t)x25 * x58) + (((uint64_t)0x2 * (x27 * x59)) + (((uint64_t)0x2 * (x29 * x57)) + (((uint64_t)0x2 * (x31 * x55)) + ((uint64_t)x30 * x53)))))));
+{ uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)0x2 * (x7 * x47)) + (((uint64_t)0x2 * (x9 * x45)) + (((uint64_t)x11 * x43) + ((x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)0x2 * (x17 * x37)) + (((uint64_t)0x2 * (x19 * x35)) + ((uint64_t)x21 * x33))))))))) + (0x3 * ((x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)0x2 * (x27 * x57)) + (((uint64_t)0x2 * (x29 * x55)) + (((uint64_t)x31 * x53) + (x30 * x51))))))));
+{ uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)0x2 * (x7 * x45)) + (((uint64_t)x9 * x43) + ((x11 * x41) + ((x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)0x2 * (x17 * x35)) + ((uint64_t)x19 * x33)))))))) + (0x3 * ((x21 * x58) + ((x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)0x2 * (x27 * x55)) + (((uint64_t)x29 * x53) + ((uint64_t)(x31 * x51) + (x30 * x49)))))))));
+{ uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + ((x9 * x41) + ((x11 * x39) + ((x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x3 * ((x19 * x58) + ((x21 * x59) + ((x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((x29 * x51) + ((uint64_t)(x31 * x49) + (x30 * x47))))))))));
+{ uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)0x2 * (x7 * x41)) + (((uint64_t)0x2 * (x9 * x39)) + (((uint64_t)0x2 * (x11 * x37)) + (((uint64_t)0x2 * (x13 * x35)) + ((uint64_t)x15 * x33)))))) + (0x3 * (((uint64_t)0x2 * (x17 * x58)) + (((uint64_t)0x2 * (x19 * x59)) + (((uint64_t)0x2 * (x21 * x57)) + (((uint64_t)0x2 * (x23 * x55)) + (((uint64_t)x25 * x53) + (((uint64_t)0x2 * (x27 * x51)) + (((uint64_t)0x2 * (x29 * x49)) + (((uint64_t)0x2 * (x31 * x47)) + ((uint64_t)0x2 * (x30 * x45))))))))))));
+{ uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)0x2 * (x7 * x39)) + (((uint64_t)0x2 * (x9 * x37)) + (((uint64_t)0x2 * (x11 * x35)) + ((uint64_t)x13 * x33))))) + (0x3 * (((uint64_t)x15 * x58) + (((uint64_t)0x2 * (x17 * x59)) + (((uint64_t)0x2 * (x19 * x57)) + (((uint64_t)0x2 * (x21 * x55)) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)0x2 * (x27 * x49)) + (((uint64_t)0x2 * (x29 * x47)) + (((uint64_t)0x2 * (x31 * x45)) + ((uint64_t)x30 * x43))))))))))));
+{ uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)0x2 * (x7 * x37)) + (((uint64_t)0x2 * (x9 * x35)) + ((uint64_t)x11 * x33)))) + (0x3 * ((x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)0x2 * (x17 * x57)) + (((uint64_t)0x2 * (x19 * x55)) + (((uint64_t)x21 * x53) + ((x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)0x2 * (x27 * x47)) + (((uint64_t)0x2 * (x29 * x45)) + (((uint64_t)x31 * x43) + (x30 * x41)))))))))))));
+{ uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)0x2 * (x7 * x35)) + ((uint64_t)x9 * x33))) + (0x3 * ((x11 * x58) + ((x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)0x2 * (x17 * x55)) + (((uint64_t)x19 * x53) + ((x21 * x51) + ((x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)0x2 * (x27 * x45)) + (((uint64_t)x29 * x43) + ((uint64_t)(x31 * x41) + (x30 * x39))))))))))))));
+{ uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x3 * ((x9 * x58) + ((x11 * x59) + ((x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((x19 * x51) + ((x21 * x49) + ((x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((x29 * x41) + ((uint64_t)(x31 * x39) + (x30 * x37)))))))))))))));
+{ uint64_t x74 = (((uint64_t)x5 * x33) + (0x3 * (((uint64_t)0x2 * (x7 * x58)) + (((uint64_t)0x2 * (x9 * x59)) + (((uint64_t)0x2 * (x11 * x57)) + (((uint64_t)0x2 * (x13 * x55)) + (((uint64_t)x15 * x53) + (((uint64_t)0x2 * (x17 * x51)) + (((uint64_t)0x2 * (x19 * x49)) + (((uint64_t)0x2 * (x21 * x47)) + (((uint64_t)0x2 * (x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)0x2 * (x27 * x41)) + (((uint64_t)0x2 * (x29 * x39)) + (((uint64_t)0x2 * (x31 * x37)) + ((uint64_t)0x2 * (x30 * x35)))))))))))))))));
+{ uint32_t x75 = (uint32_t) (x74 >> 0xf);
+{ uint32_t x76 = ((uint32_t)x74 & 0x7fff);
+{ uint64_t x77 = (x75 + x73);
+{ uint32_t x78 = (uint32_t) (x77 >> 0xe);
+{ uint32_t x79 = ((uint32_t)x77 & 0x3fff);
+{ uint64_t x80 = (x78 + x72);
+{ uint32_t x81 = (uint32_t) (x80 >> 0xe);
+{ uint32_t x82 = ((uint32_t)x80 & 0x3fff);
+{ uint64_t x83 = (x81 + x71);
+{ uint32_t x84 = (uint32_t) (x83 >> 0xe);
+{ uint32_t x85 = ((uint32_t)x83 & 0x3fff);
+{ uint64_t x86 = (x84 + x70);
+{ uint32_t x87 = (uint32_t) (x86 >> 0xe);
+{ uint32_t x88 = ((uint32_t)x86 & 0x3fff);
+{ uint64_t x89 = (x87 + x69);
+{ uint32_t x90 = (uint32_t) (x89 >> 0xf);
+{ uint32_t x91 = ((uint32_t)x89 & 0x7fff);
+{ uint64_t x92 = (x90 + x68);
+{ uint32_t x93 = (uint32_t) (x92 >> 0xe);
+{ uint32_t x94 = ((uint32_t)x92 & 0x3fff);
+{ uint64_t x95 = (x93 + x67);
+{ uint32_t x96 = (uint32_t) (x95 >> 0xe);
+{ uint32_t x97 = ((uint32_t)x95 & 0x3fff);
+{ uint64_t x98 = (x96 + x66);
+{ uint32_t x99 = (uint32_t) (x98 >> 0xe);
+{ uint32_t x100 = ((uint32_t)x98 & 0x3fff);
+{ uint64_t x101 = (x99 + x65);
+{ uint32_t x102 = (uint32_t) (x101 >> 0xe);
+{ uint32_t x103 = ((uint32_t)x101 & 0x3fff);
+{ uint64_t x104 = (x102 + x64);
+{ uint32_t x105 = (uint32_t) (x104 >> 0xf);
+{ uint32_t x106 = ((uint32_t)x104 & 0x7fff);
+{ uint64_t x107 = (x105 + x63);
+{ uint32_t x108 = (uint32_t) (x107 >> 0xe);
+{ uint32_t x109 = ((uint32_t)x107 & 0x3fff);
+{ uint64_t x110 = (x108 + x62);
+{ uint32_t x111 = (uint32_t) (x110 >> 0xe);
+{ uint32_t x112 = ((uint32_t)x110 & 0x3fff);
+{ uint64_t x113 = (x111 + x61);
+{ uint32_t x114 = (uint32_t) (x113 >> 0xe);
+{ uint32_t x115 = ((uint32_t)x113 & 0x3fff);
+{ uint64_t x116 = (x114 + x60);
+{ uint32_t x117 = (uint32_t) (x116 >> 0xe);
+{ uint32_t x118 = ((uint32_t)x116 & 0x3fff);
+{ uint32_t x119 = (x76 + (0x3 * x117));
+{ uint32_t x120 = (x119 >> 0xf);
+{ uint32_t x121 = (x119 & 0x7fff);
+{ uint32_t x122 = (x120 + x79);
+{ uint32_t x123 = (x122 >> 0xe);
+{ uint32_t x124 = (x122 & 0x3fff);
+out[0] = x118;
+out[1] = x115;
+out[2] = x112;
+out[3] = x109;
+out[4] = x106;
+out[5] = x103;
+out[6] = x100;
+out[7] = x97;
+out[8] = x94;
+out[9] = x91;
+out[10] = x88;
+out[11] = x85;
+out[12] = x123 + x82;
+out[13] = x124;
+out[14] = x121;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/solinas32_2e213m3/femul.h b/src/Specific/solinas32_2e213m3/femul.h
new file mode 100644
index 000000000..5d9164651
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/solinas32_2e213m3/fesquare.c b/src/Specific/solinas32_2e213m3/fesquare.c
new file mode 100644
index 000000000..70a9e562d
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/fesquare.c
@@ -0,0 +1,101 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)0x2 * (x4 * x28)) + (((uint64_t)0x2 * (x6 * x26)) + (((uint64_t)0x2 * (x8 * x24)) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)0x2 * (x14 * x18)) + (((uint64_t)0x2 * (x16 * x16)) + (((uint64_t)0x2 * (x18 * x14)) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)0x2 * (x24 * x8)) + (((uint64_t)0x2 * (x26 * x6)) + (((uint64_t)0x2 * (x28 * x4)) + ((uint64_t)x27 * x2)))))))))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)0x2 * (x4 * x26)) + (((uint64_t)0x2 * (x6 * x24)) + (((uint64_t)x8 * x22) + ((x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)0x2 * (x14 * x16)) + (((uint64_t)0x2 * (x16 * x14)) + (((uint64_t)x18 * x12) + ((x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)0x2 * (x24 * x6)) + (((uint64_t)0x2 * (x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + ((uint64_t)0x3 * (x27 * x27)));
+{ uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)0x2 * (x4 * x24)) + (((uint64_t)x6 * x22) + ((x8 * x20) + ((x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)0x2 * (x14 * x14)) + (((uint64_t)x16 * x12) + ((x18 * x10) + ((x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)0x2 * (x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x3 * ((uint64_t)(x28 * x27) + (x27 * x28))));
+{ uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + ((x6 * x20) + ((x8 * x18) + ((x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((x16 * x10) + ((x18 * x8) + ((x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x3 * ((x26 * x27) + ((uint64_t)(x28 * x28) + (x27 * x26)))));
+{ uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)0x2 * (x4 * x20)) + (((uint64_t)0x2 * (x6 * x18)) + (((uint64_t)0x2 * (x8 * x16)) + (((uint64_t)0x2 * (x10 * x14)) + (((uint64_t)x12 * x12) + (((uint64_t)0x2 * (x14 * x10)) + (((uint64_t)0x2 * (x16 * x8)) + (((uint64_t)0x2 * (x18 * x6)) + (((uint64_t)0x2 * (x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * (((uint64_t)0x2 * (x24 * x27)) + (((uint64_t)0x2 * (x26 * x28)) + (((uint64_t)0x2 * (x28 * x26)) + ((uint64_t)0x2 * (x27 * x24)))))));
+{ uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)0x2 * (x4 * x18)) + (((uint64_t)0x2 * (x6 * x16)) + (((uint64_t)0x2 * (x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)0x2 * (x14 * x8)) + (((uint64_t)0x2 * (x16 * x6)) + (((uint64_t)0x2 * (x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x27) + (((uint64_t)0x2 * (x24 * x28)) + (((uint64_t)0x2 * (x26 * x26)) + (((uint64_t)0x2 * (x28 * x24)) + ((uint64_t)x27 * x22)))))));
+{ uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)0x2 * (x4 * x16)) + (((uint64_t)0x2 * (x6 * x14)) + (((uint64_t)x8 * x12) + ((x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)0x2 * (x14 * x6)) + (((uint64_t)0x2 * (x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)0x2 * (x24 * x26)) + (((uint64_t)0x2 * (x26 * x24)) + (((uint64_t)x28 * x22) + (x27 * x20))))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)0x2 * (x4 * x14)) + (((uint64_t)x6 * x12) + ((x8 * x10) + ((x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)0x2 * (x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * ((x18 * x27) + ((x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)0x2 * (x24 * x24)) + (((uint64_t)x26 * x22) + ((uint64_t)(x28 * x20) + (x27 * x18)))))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + ((x6 * x10) + ((x8 * x8) + ((x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * ((x16 * x27) + ((x18 * x28) + ((x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((x26 * x20) + ((uint64_t)(x28 * x18) + (x27 * x16))))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)0x2 * (x4 * x10)) + (((uint64_t)0x2 * (x6 * x8)) + (((uint64_t)0x2 * (x8 * x6)) + (((uint64_t)0x2 * (x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)0x2 * (x14 * x27)) + (((uint64_t)0x2 * (x16 * x28)) + (((uint64_t)0x2 * (x18 * x26)) + (((uint64_t)0x2 * (x20 * x24)) + (((uint64_t)x22 * x22) + (((uint64_t)0x2 * (x24 * x20)) + (((uint64_t)0x2 * (x26 * x18)) + (((uint64_t)0x2 * (x28 * x16)) + ((uint64_t)0x2 * (x27 * x14))))))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)0x2 * (x4 * x8)) + (((uint64_t)0x2 * (x6 * x6)) + (((uint64_t)0x2 * (x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x27) + (((uint64_t)0x2 * (x14 * x28)) + (((uint64_t)0x2 * (x16 * x26)) + (((uint64_t)0x2 * (x18 * x24)) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)0x2 * (x24 * x18)) + (((uint64_t)0x2 * (x26 * x16)) + (((uint64_t)0x2 * (x28 * x14)) + ((uint64_t)x27 * x12))))))))))));
+{ uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)0x2 * (x4 * x6)) + (((uint64_t)0x2 * (x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * ((x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)0x2 * (x14 * x26)) + (((uint64_t)0x2 * (x16 * x24)) + (((uint64_t)x18 * x22) + ((x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)0x2 * (x24 * x16)) + (((uint64_t)0x2 * (x26 * x14)) + (((uint64_t)x28 * x12) + (x27 * x10)))))))))))));
+{ uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)0x2 * (x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * ((x8 * x27) + ((x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)0x2 * (x14 * x24)) + (((uint64_t)x16 * x22) + ((x18 * x20) + ((x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)0x2 * (x24 * x14)) + (((uint64_t)x26 * x12) + ((uint64_t)(x28 * x10) + (x27 * x8))))))))))))));
+{ uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * ((x6 * x27) + ((x8 * x28) + ((x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((x16 * x20) + ((x18 * x18) + ((x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((x26 * x10) + ((uint64_t)(x28 * x8) + (x27 * x6)))))))))))))));
+{ uint64_t x43 = (((uint64_t)x2 * x2) + (0x3 * (((uint64_t)0x2 * (x4 * x27)) + (((uint64_t)0x2 * (x6 * x28)) + (((uint64_t)0x2 * (x8 * x26)) + (((uint64_t)0x2 * (x10 * x24)) + (((uint64_t)x12 * x22) + (((uint64_t)0x2 * (x14 * x20)) + (((uint64_t)0x2 * (x16 * x18)) + (((uint64_t)0x2 * (x18 * x16)) + (((uint64_t)0x2 * (x20 * x14)) + (((uint64_t)x22 * x12) + (((uint64_t)0x2 * (x24 * x10)) + (((uint64_t)0x2 * (x26 * x8)) + (((uint64_t)0x2 * (x28 * x6)) + ((uint64_t)0x2 * (x27 * x4)))))))))))))))));
+{ uint32_t x44 = (uint32_t) (x43 >> 0xf);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7fff);
+{ uint64_t x46 = (x44 + x42);
+{ uint32_t x47 = (uint32_t) (x46 >> 0xe);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fff);
+{ uint64_t x49 = (x47 + x41);
+{ uint32_t x50 = (uint32_t) (x49 >> 0xe);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3fff);
+{ uint64_t x52 = (x50 + x40);
+{ uint32_t x53 = (uint32_t) (x52 >> 0xe);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fff);
+{ uint64_t x55 = (x53 + x39);
+{ uint32_t x56 = (uint32_t) (x55 >> 0xe);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fff);
+{ uint64_t x58 = (x56 + x38);
+{ uint32_t x59 = (uint32_t) (x58 >> 0xf);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fff);
+{ uint64_t x61 = (x59 + x37);
+{ uint32_t x62 = (uint32_t) (x61 >> 0xe);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3fff);
+{ uint64_t x64 = (x62 + x36);
+{ uint32_t x65 = (uint32_t) (x64 >> 0xe);
+{ uint32_t x66 = ((uint32_t)x64 & 0x3fff);
+{ uint64_t x67 = (x65 + x35);
+{ uint32_t x68 = (uint32_t) (x67 >> 0xe);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3fff);
+{ uint64_t x70 = (x68 + x34);
+{ uint32_t x71 = (uint32_t) (x70 >> 0xe);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fff);
+{ uint64_t x73 = (x71 + x33);
+{ uint32_t x74 = (uint32_t) (x73 >> 0xf);
+{ uint32_t x75 = ((uint32_t)x73 & 0x7fff);
+{ uint64_t x76 = (x74 + x32);
+{ uint32_t x77 = (uint32_t) (x76 >> 0xe);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3fff);
+{ uint64_t x79 = (x77 + x31);
+{ uint32_t x80 = (uint32_t) (x79 >> 0xe);
+{ uint32_t x81 = ((uint32_t)x79 & 0x3fff);
+{ uint64_t x82 = (x80 + x30);
+{ uint32_t x83 = (uint32_t) (x82 >> 0xe);
+{ uint32_t x84 = ((uint32_t)x82 & 0x3fff);
+{ uint64_t x85 = (x83 + x29);
+{ uint32_t x86 = (uint32_t) (x85 >> 0xe);
+{ uint32_t x87 = ((uint32_t)x85 & 0x3fff);
+{ uint32_t x88 = (x45 + (0x3 * x86));
+{ uint32_t x89 = (x88 >> 0xf);
+{ uint32_t x90 = (x88 & 0x7fff);
+{ uint32_t x91 = (x89 + x48);
+{ uint32_t x92 = (x91 >> 0xe);
+{ uint32_t x93 = (x91 & 0x3fff);
+out[0] = x87;
+out[1] = x84;
+out[2] = x81;
+out[3] = x78;
+out[4] = x75;
+out[5] = x72;
+out[6] = x69;
+out[7] = x66;
+out[8] = x63;
+out[9] = x60;
+out[10] = x57;
+out[11] = x54;
+out[12] = x92 + x51;
+out[13] = x93;
+out[14] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/solinas32_2e213m3/fesquare.h b/src/Specific/solinas32_2e213m3/fesquare.h
new file mode 100644
index 000000000..900a6956e
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e213m3/freeze.c b/src/Specific/solinas32_2e213m3/freeze.c
new file mode 100644
index 000000000..595021a15
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x30;
+out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 15 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e213m3/freeze.h b/src/Specific/solinas32_2e213m3/freeze.h
new file mode 100644
index 000000000..ffbccdea2
--- /dev/null
+++ b/src/Specific/solinas32_2e213m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e216m2e108m1/femul.c b/src/Specific/solinas32_2e216m2e108m1/femul.c
new file mode 100644
index 000000000..2cd20ddc6
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/femul.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x32 = (((uint64_t)(x11 + x16) * (x25 + x30)) - ((uint64_t)x11 * x25));
+{ uint64_t x33 = ((((uint64_t)(x9 + x17) * (x25 + x30)) + ((uint64_t)(x11 + x16) * (x23 + x31))) - (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)));
+{ uint64_t x34 = ((((uint64_t)(x7 + x15) * (x25 + x30)) + (((uint64_t)(x9 + x17) * (x23 + x31)) + ((uint64_t)(x11 + x16) * (x21 + x29)))) - (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))));
+{ uint64_t x35 = ((((uint64_t)(x5 + x13) * (x25 + x30)) + (((uint64_t)(x7 + x15) * (x23 + x31)) + (((uint64_t)(x9 + x17) * (x21 + x29)) + ((uint64_t)(x11 + x16) * (x19 + x27))))) - (((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))));
+{ uint64_t x36 = ((((uint64_t)(x5 + x13) * (x23 + x31)) + (((uint64_t)(x7 + x15) * (x21 + x29)) + ((uint64_t)(x9 + x17) * (x19 + x27)))) - (((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))));
+{ uint64_t x37 = ((((uint64_t)(x5 + x13) * (x21 + x29)) + ((uint64_t)(x7 + x15) * (x19 + x27))) - (((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)));
+{ uint64_t x38 = (((uint64_t)(x5 + x13) * (x19 + x27)) - ((uint64_t)x5 * x19));
+{ uint64_t x39 = (((((uint64_t)x11 * x25) + ((uint64_t)x16 * x30)) + x36) + x32);
+{ uint64_t x40 = ((((((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)) + (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))) + x37) + x33);
+{ uint64_t x41 = ((((((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21))) + (((uint64_t)x15 * x30) + (((uint64_t)x17 * x31) + ((uint64_t)x16 * x29)))) + x38) + x34);
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + (((uint64_t)x9 * x21) + ((uint64_t)x11 * x19)))) + (((uint64_t)x13 * x30) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x16 * x27)))));
+{ uint64_t x43 = (((((uint64_t)x5 * x23) + (((uint64_t)x7 * x21) + ((uint64_t)x9 * x19))) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27)))) + x32);
+{ uint64_t x44 = (((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27))) + x33);
+{ uint64_t x45 = ((((uint64_t)x5 * x19) + ((uint64_t)x13 * x27)) + x34);
+{ uint64_t x46 = (x42 >> 0x1b);
+{ uint32_t x47 = ((uint32_t)x42 & 0x7ffffff);
+{ uint64_t x48 = (x35 >> 0x1b);
+{ uint32_t x49 = ((uint32_t)x35 & 0x7ffffff);
+{ uint64_t x50 = ((0x8000000 * x48) + x49);
+{ uint64_t x51 = (x50 >> 0x1b);
+{ uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
+{ uint64_t x53 = ((x46 + x41) + x51);
+{ uint64_t x54 = (x53 >> 0x1b);
+{ uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
+{ uint64_t x56 = (x45 + x51);
+{ uint64_t x57 = (x56 >> 0x1b);
+{ uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
+{ uint64_t x59 = (x54 + x40);
+{ uint64_t x60 = (x59 >> 0x1b);
+{ uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
+{ uint64_t x62 = (x57 + x44);
+{ uint64_t x63 = (x62 >> 0x1b);
+{ uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
+{ uint64_t x65 = (x60 + x39);
+{ uint64_t x66 = (x65 >> 0x1b);
+{ uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
+{ uint64_t x68 = (x63 + x43);
+{ uint64_t x69 = (x68 >> 0x1b);
+{ uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
+{ uint64_t x71 = (x66 + x52);
+{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+{ uint64_t x74 = (x69 + x47);
+{ uint32_t x75 = (uint32_t) (x74 >> 0x1b);
+{ uint32_t x76 = ((uint32_t)x74 & 0x7ffffff);
+{ uint64_t x77 = (((uint64_t)0x8000000 * x72) + x73);
+{ uint32_t x78 = (uint32_t) (x77 >> 0x1b);
+{ uint32_t x79 = ((uint32_t)x77 & 0x7ffffff);
+{ uint32_t x80 = ((x75 + x55) + x78);
+{ uint32_t x81 = (x80 >> 0x1b);
+{ uint32_t x82 = (x80 & 0x7ffffff);
+{ uint32_t x83 = (x58 + x78);
+{ uint32_t x84 = (x83 >> 0x1b);
+{ uint32_t x85 = (x83 & 0x7ffffff);
+out[0] = x79;
+out[1] = x67;
+out[2] = x81 + x61;
+out[3] = x82;
+out[4] = x76;
+out[5] = x70;
+out[6] = x84 + x64;
+out[7] = x85;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e216m2e108m1/femul.h b/src/Specific/solinas32_2e216m2e108m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e216m2e108m1/fesquare.c b/src/Specific/solinas32_2e216m2e108m1/fesquare.c
new file mode 100644
index 000000000..88056d14c
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/fesquare.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (((uint64_t)(x8 + x13) * (x8 + x13)) - ((uint64_t)x8 * x8));
+{ uint64_t x16 = ((((uint64_t)(x6 + x14) * (x8 + x13)) + ((uint64_t)(x8 + x13) * (x6 + x14))) - (((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)));
+{ uint64_t x17 = ((((uint64_t)(x4 + x12) * (x8 + x13)) + (((uint64_t)(x6 + x14) * (x6 + x14)) + ((uint64_t)(x8 + x13) * (x4 + x12)))) - (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))));
+{ uint64_t x18 = ((((uint64_t)(x2 + x10) * (x8 + x13)) + (((uint64_t)(x4 + x12) * (x6 + x14)) + (((uint64_t)(x6 + x14) * (x4 + x12)) + ((uint64_t)(x8 + x13) * (x2 + x10))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+{ uint64_t x19 = ((((uint64_t)(x2 + x10) * (x6 + x14)) + (((uint64_t)(x4 + x12) * (x4 + x12)) + ((uint64_t)(x6 + x14) * (x2 + x10)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+{ uint64_t x20 = ((((uint64_t)(x2 + x10) * (x4 + x12)) + ((uint64_t)(x4 + x12) * (x2 + x10))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+{ uint64_t x21 = (((uint64_t)(x2 + x10) * (x2 + x10)) - ((uint64_t)x2 * x2));
+{ uint64_t x22 = (((((uint64_t)x8 * x8) + ((uint64_t)x13 * x13)) + x19) + x15);
+{ uint64_t x23 = ((((((uint64_t)x6 * x8) + ((uint64_t)x8 * x6)) + (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))) + x20) + x16);
+{ uint64_t x24 = ((((((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + ((uint64_t)x8 * x4))) + (((uint64_t)x12 * x13) + (((uint64_t)x14 * x14) + ((uint64_t)x13 * x12)))) + x21) + x17);
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x10 * x13) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x13 * x10)))));
+{ uint64_t x26 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10)))) + x15);
+{ uint64_t x27 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x10 * x12) + ((uint64_t)x12 * x10))) + x16);
+{ uint64_t x28 = ((((uint64_t)x2 * x2) + ((uint64_t)x10 * x10)) + x17);
+{ uint64_t x29 = (x25 >> 0x1b);
+{ uint32_t x30 = ((uint32_t)x25 & 0x7ffffff);
+{ uint64_t x31 = (x18 >> 0x1b);
+{ uint32_t x32 = ((uint32_t)x18 & 0x7ffffff);
+{ uint64_t x33 = ((0x8000000 * x31) + x32);
+{ uint64_t x34 = (x33 >> 0x1b);
+{ uint32_t x35 = ((uint32_t)x33 & 0x7ffffff);
+{ uint64_t x36 = ((x29 + x24) + x34);
+{ uint64_t x37 = (x36 >> 0x1b);
+{ uint32_t x38 = ((uint32_t)x36 & 0x7ffffff);
+{ uint64_t x39 = (x28 + x34);
+{ uint64_t x40 = (x39 >> 0x1b);
+{ uint32_t x41 = ((uint32_t)x39 & 0x7ffffff);
+{ uint64_t x42 = (x37 + x23);
+{ uint64_t x43 = (x42 >> 0x1b);
+{ uint32_t x44 = ((uint32_t)x42 & 0x7ffffff);
+{ uint64_t x45 = (x40 + x27);
+{ uint64_t x46 = (x45 >> 0x1b);
+{ uint32_t x47 = ((uint32_t)x45 & 0x7ffffff);
+{ uint64_t x48 = (x43 + x22);
+{ uint64_t x49 = (x48 >> 0x1b);
+{ uint32_t x50 = ((uint32_t)x48 & 0x7ffffff);
+{ uint64_t x51 = (x46 + x26);
+{ uint64_t x52 = (x51 >> 0x1b);
+{ uint32_t x53 = ((uint32_t)x51 & 0x7ffffff);
+{ uint64_t x54 = (x49 + x35);
+{ uint32_t x55 = (uint32_t) (x54 >> 0x1b);
+{ uint32_t x56 = ((uint32_t)x54 & 0x7ffffff);
+{ uint64_t x57 = (x52 + x30);
+{ uint32_t x58 = (uint32_t) (x57 >> 0x1b);
+{ uint32_t x59 = ((uint32_t)x57 & 0x7ffffff);
+{ uint64_t x60 = (((uint64_t)0x8000000 * x55) + x56);
+{ uint32_t x61 = (uint32_t) (x60 >> 0x1b);
+{ uint32_t x62 = ((uint32_t)x60 & 0x7ffffff);
+{ uint32_t x63 = ((x58 + x38) + x61);
+{ uint32_t x64 = (x63 >> 0x1b);
+{ uint32_t x65 = (x63 & 0x7ffffff);
+{ uint32_t x66 = (x41 + x61);
+{ uint32_t x67 = (x66 >> 0x1b);
+{ uint32_t x68 = (x66 & 0x7ffffff);
+out[0] = x62;
+out[1] = x50;
+out[2] = x64 + x44;
+out[3] = x65;
+out[4] = x59;
+out[5] = x53;
+out[6] = x67 + x47;
+out[7] = x68;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e216m2e108m1/fesquare.h b/src/Specific/solinas32_2e216m2e108m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e216m2e108m1/freeze.c b/src/Specific/solinas32_2e216m2e108m1/freeze.c
new file mode 100644
index 000000000..f29663ddb
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e216m2e108m1/freeze.h b/src/Specific/solinas32_2e216m2e108m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e216m2e108m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e221m3/femul.c b/src/Specific/solinas32_2e221m3/femul.c
new file mode 100644
index 000000000..53e74bcf0
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x3 * ((uint64_t)x20 * x38)));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x3 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x3 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x3 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x3 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x3 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x3 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x3 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x3 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+{ uint32_t x50 = (uint32_t) (x49 >> 0x17);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x16);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x16);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x16);
+{ uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x16);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint32_t x74 = (uint32_t) (x73 >> 0x16);
+{ uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x16);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+{ uint32_t x79 = (x51 + (0x3 * x77));
+{ uint32_t x80 = (x79 >> 0x17);
+{ uint32_t x81 = (x79 & 0x7fffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x16);
+{ uint32_t x84 = (x82 & 0x3fffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e221m3/femul.h b/src/Specific/solinas32_2e221m3/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e221m3/fesquare.c b/src/Specific/solinas32_2e221m3/fesquare.c
new file mode 100644
index 000000000..8c7950578
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * ((uint64_t)x17 * x17)));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+{ uint32_t x29 = (uint32_t) (x28 >> 0x17);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint32_t x32 = (uint32_t) (x31 >> 0x16);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint32_t x35 = (uint32_t) (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+{ uint32_t x58 = (x30 + (0x3 * x56));
+{ uint32_t x59 = (x58 >> 0x17);
+{ uint32_t x60 = (x58 & 0x7fffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x16);
+{ uint32_t x63 = (x61 & 0x3fffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e221m3/fesquare.h b/src/Specific/solinas32_2e221m3/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e221m3/freeze.c b/src/Specific/solinas32_2e221m3/freeze.c
new file mode 100644
index 000000000..88476affb
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e221m3/freeze.h b/src/Specific/solinas32_2e221m3/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e221m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e222m117/femul.c b/src/Specific/solinas32_2e222m117/femul.c
new file mode 100644
index 000000000..c7fac756f
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x75 * ((uint64_t)x20 * x38)));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x75 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x75 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x75 * ((0x2 * ((uint64_t)x17 * x38)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + (0x2 * ((uint64_t)x20 * x35)))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x75 * (((uint64_t)x15 * x38) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((uint64_t)x20 * x33)))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x75 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + ((0x2 * ((uint64_t)x17 * x37)) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x75 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((0x2 * ((uint64_t)x17 * x35)) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x75 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x75 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+{ uint64_t x50 = (x49 >> 0x17);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint64_t x53 = (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint64_t x56 = (x55 >> 0x16);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint64_t x59 = (x58 >> 0x16);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint64_t x62 = (x61 >> 0x16);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint64_t x65 = (x64 >> 0x17);
+{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint64_t x68 = (x67 >> 0x16);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint64_t x71 = (x70 >> 0x16);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint64_t x74 = (x73 >> 0x16);
+{ uint32_t x75 = ((uint32_t)x73 & 0x3fffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x16);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+{ uint64_t x79 = (x51 + ((uint64_t)0x75 * x77));
+{ uint32_t x80 = (uint32_t) (x79 >> 0x17);
+{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x16);
+{ uint32_t x84 = (x82 & 0x3fffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e222m117/femul.h b/src/Specific/solinas32_2e222m117/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e222m117/fesquare.c b/src/Specific/solinas32_2e222m117/fesquare.c
new file mode 100644
index 000000000..c6de6e097
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x75 * ((uint64_t)x17 * x17)));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x75 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x75 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x75 * ((0x2 * ((uint64_t)x14 * x17)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (0x2 * ((uint64_t)x17 * x14)))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x75 * (((uint64_t)x12 * x17) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((uint64_t)x17 * x12)))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x75 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x75 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + ((0x2 * ((uint64_t)x14 * x14)) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x75 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x75 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + (((uint64_t)x12 * x12) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+{ uint64_t x29 = (x28 >> 0x17);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint64_t x32 = (x31 >> 0x16);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3fffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint64_t x35 = (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint64_t x38 = (x37 >> 0x16);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint64_t x41 = (x40 >> 0x16);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint64_t x44 = (x43 >> 0x17);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint64_t x47 = (x46 >> 0x16);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint64_t x50 = (x49 >> 0x16);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint64_t x53 = (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+{ uint64_t x58 = (x30 + ((uint64_t)0x75 * x56));
+{ uint32_t x59 = (uint32_t) (x58 >> 0x17);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x16);
+{ uint32_t x63 = (x61 & 0x3fffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e222m117/fesquare.h b/src/Specific/solinas32_2e222m117/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e222m117/freeze.c b/src/Specific/solinas32_2e222m117/freeze.c
new file mode 100644
index 000000000..abd6941de
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fff8b;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e222m117/freeze.h b/src/Specific/solinas32_2e222m117/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e222m117/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e224m2e96p1/freeze.c b/src/Specific/solinas32_2e224m2e96p1/freeze.c
new file mode 100644
index 000000000..b2ffc8354
--- /dev/null
+++ b/src/Specific/solinas32_2e224m2e96p1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = ℤ x17 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 3 Syntax.TWord 5 Syntax.TZ 0x0;
+out[2] = x2;
+out[3] = 0x1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e224m2e96p1/freeze.h b/src/Specific/solinas32_2e224m2e96p1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e224m2e96p1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e226m5/femul.c b/src/Specific/solinas32_2e226m5/femul.c
new file mode 100644
index 000000000..ffc389e14
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint64_t x32 = (((uint64_t)x5 * x30) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((0x2 * ((uint64_t)x15 * x23)) + ((0x2 * ((uint64_t)x17 * x21)) + ((uint64_t)x16 * x19))))))));
+{ uint64_t x33 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((0x2 * ((uint64_t)x15 * x21)) + ((uint64_t)x17 * x19))))))) + (0x5 * ((uint64_t)x16 * x30)));
+{ uint64_t x34 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + (((uint64_t)x13 * x21) + ((uint64_t)x15 * x19)))))) + (0x5 * (((uint64_t)x17 * x30) + ((uint64_t)x16 * x31))));
+{ ℤ x35 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((0x2 * ((uint64_t)x11 * x21)) + ((uint64_t)x13 * x19))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x15 * x30)) + ((0x2 * ((uint64_t)x17 * x31)) + (0x2 * ((uint64_t)x16 * x29))))));
+{ ℤ x36 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((0x2 * ((uint64_t)x9 * x21)) + ((uint64_t)x11 * x19)))) +ℤ (0x5 *ℤ (((uint64_t)x13 * x30) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x16 * x27))))));
+{ ℤ x37 = ((((uint64_t)x5 * x23) + ((0x2 * ((uint64_t)x7 * x21)) + ((uint64_t)x9 * x19))) +ℤ (0x5 *ℤ (((uint64_t)x11 * x30) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((uint64_t)x16 * x25)))))));
+{ ℤ x38 = ((((uint64_t)x5 * x21) + ((uint64_t)x7 * x19)) +ℤ (0x5 *ℤ (((uint64_t)x9 * x30) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x16 * x23))))))));
+{ ℤ x39 = (((uint64_t)x5 * x19) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x7 * x30)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + (0x2 * ((uint64_t)x16 * x21))))))))));
+{ uint64_t x40 = (x39 >> 0x1d);
+{ uint32_t x41 = (x39 & 0x1fffffff);
+{ ℤ x42 = (x40 +ℤ x38);
+{ uint64_t x43 = (x42 >> 0x1c);
+{ uint32_t x44 = (x42 & 0xfffffff);
+{ ℤ x45 = (x43 +ℤ x37);
+{ uint64_t x46 = (x45 >> 0x1c);
+{ uint32_t x47 = (x45 & 0xfffffff);
+{ ℤ x48 = (x46 +ℤ x36);
+{ uint64_t x49 = (x48 >> 0x1c);
+{ uint32_t x50 = (x48 & 0xfffffff);
+{ ℤ x51 = (x49 +ℤ x35);
+{ uint64_t x52 = (x51 >> 0x1d);
+{ uint32_t x53 = (x51 & 0x1fffffff);
+{ uint64_t x54 = (x52 + x34);
+{ uint64_t x55 = (x54 >> 0x1c);
+{ uint32_t x56 = ((uint32_t)x54 & 0xfffffff);
+{ uint64_t x57 = (x55 + x33);
+{ uint64_t x58 = (x57 >> 0x1c);
+{ uint32_t x59 = ((uint32_t)x57 & 0xfffffff);
+{ uint64_t x60 = (x58 + x32);
+{ uint64_t x61 = (x60 >> 0x1c);
+{ uint32_t x62 = ((uint32_t)x60 & 0xfffffff);
+{ uint64_t x63 = (x41 + (0x5 * x61));
+{ uint32_t x64 = (uint32_t) (x63 >> 0x1d);
+{ uint32_t x65 = ((uint32_t)x63 & 0x1fffffff);
+{ uint32_t x66 = (x64 + x44);
+{ uint32_t x67 = (x66 >> 0x1c);
+{ uint32_t x68 = (x66 & 0xfffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e226m5/femul.h b/src/Specific/solinas32_2e226m5/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas32_2e226m5/fesquare.c b/src/Specific/solinas32_2e226m5/fesquare.c
new file mode 100644
index 000000000..ca7577562
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x15 = (((uint64_t)x2 * x13) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x13 * x2))))))));
+{ uint64_t x16 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x5 * ((uint64_t)x13 * x13)));
+{ uint64_t x17 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x5 * (((uint64_t)x14 * x13) + ((uint64_t)x13 * x14))));
+{ ℤ x18 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x12 * x13)) + ((0x2 * ((uint64_t)x14 * x14)) + (0x2 * ((uint64_t)x13 * x12))))));
+{ ℤ x19 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x5 *ℤ (((uint64_t)x10 * x13) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((uint64_t)x13 * x10))))));
+{ ℤ x20 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x5 *ℤ (((uint64_t)x8 * x13) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((uint64_t)x13 * x8)))))));
+{ ℤ x21 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x5 *ℤ (((uint64_t)x6 * x13) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x13 * x6))))))));
+{ ℤ x22 = (((uint64_t)x2 * x2) +ℤ (0x5 *ℤ ((0x2 * ((uint64_t)x4 * x13)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + (0x2 * ((uint64_t)x13 * x4))))))))));
+{ uint64_t x23 = (x22 >> 0x1d);
+{ uint32_t x24 = (x22 & 0x1fffffff);
+{ ℤ x25 = (x23 +ℤ x21);
+{ uint64_t x26 = (x25 >> 0x1c);
+{ uint32_t x27 = (x25 & 0xfffffff);
+{ ℤ x28 = (x26 +ℤ x20);
+{ uint64_t x29 = (x28 >> 0x1c);
+{ uint32_t x30 = (x28 & 0xfffffff);
+{ ℤ x31 = (x29 +ℤ x19);
+{ uint64_t x32 = (x31 >> 0x1c);
+{ uint32_t x33 = (x31 & 0xfffffff);
+{ ℤ x34 = (x32 +ℤ x18);
+{ uint64_t x35 = (x34 >> 0x1d);
+{ uint32_t x36 = (x34 & 0x1fffffff);
+{ uint64_t x37 = (x35 + x17);
+{ uint64_t x38 = (x37 >> 0x1c);
+{ uint32_t x39 = ((uint32_t)x37 & 0xfffffff);
+{ uint64_t x40 = (x38 + x16);
+{ uint64_t x41 = (x40 >> 0x1c);
+{ uint32_t x42 = ((uint32_t)x40 & 0xfffffff);
+{ uint64_t x43 = (x41 + x15);
+{ uint64_t x44 = (x43 >> 0x1c);
+{ uint32_t x45 = ((uint32_t)x43 & 0xfffffff);
+{ uint64_t x46 = (x24 + (0x5 * x44));
+{ uint32_t x47 = (uint32_t) (x46 >> 0x1d);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1fffffff);
+{ uint32_t x49 = (x47 + x27);
+{ uint32_t x50 = (x49 >> 0x1c);
+{ uint32_t x51 = (x49 & 0xfffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas32_2e226m5/fesquare.h b/src/Specific/solinas32_2e226m5/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e226m5/freeze.c b/src/Specific/solinas32_2e226m5/freeze.c
new file mode 100644
index 000000000..5251bb672
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e226m5/freeze.h b/src/Specific/solinas32_2e226m5/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas32_2e226m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e230m27/femul.c b/src/Specific/solinas32_2e230m27/femul.c
new file mode 100644
index 000000000..ebefa4d02
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x21 * x23))))))))) + (0x1b * ((uint64_t)x20 * x38)));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x1b * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + ((uint64_t)x17 * x23))))))) + (0x1b * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x1b * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((uint64_t)x13 * x23))))) + (0x1b * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x1b * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + ((uint64_t)x9 * x23))) + (0x1b * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x1b * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x1b * (((uint64_t)x7 * x38) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + (((uint64_t)x21 * x27) + ((uint64_t)x20 * x25)))))))))));
+{ uint64_t x50 = (x49 >> 0x17);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint64_t x53 = (x52 >> 0x17);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint64_t x56 = (x55 >> 0x17);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint64_t x59 = (x58 >> 0x17);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint64_t x62 = (x61 >> 0x17);
+{ uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint64_t x65 = (x64 >> 0x17);
+{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint64_t x68 = (x67 >> 0x17);
+{ uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint64_t x71 = (x70 >> 0x17);
+{ uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint32_t x74 = (uint32_t) (x73 >> 0x17);
+{ uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x17);
+{ uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
+{ uint64_t x79 = (x51 + ((uint64_t)0x1b * x77));
+{ uint32_t x80 = (uint32_t) (x79 >> 0x17);
+{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x17);
+{ uint32_t x84 = (x82 & 0x7fffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e230m27/femul.h b/src/Specific/solinas32_2e230m27/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e230m27/fesquare.c b/src/Specific/solinas32_2e230m27/fesquare.c
new file mode 100644
index 000000000..edf1517cb
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x1b * ((uint64_t)x17 * x17)));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x1b * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x1b * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x1b * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x1b * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x1b * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x1b * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1b * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x1b * (((uint64_t)x4 * x17) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((uint64_t)x17 * x4)))))))))));
+{ uint64_t x29 = (x28 >> 0x17);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7fffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint64_t x32 = (x31 >> 0x17);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7fffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint64_t x35 = (x34 >> 0x17);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint64_t x38 = (x37 >> 0x17);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7fffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint64_t x41 = (x40 >> 0x17);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7fffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint64_t x44 = (x43 >> 0x17);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7fffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint64_t x47 = (x46 >> 0x17);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7fffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint64_t x50 = (x49 >> 0x17);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x17);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x17);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+{ uint64_t x58 = (x30 + ((uint64_t)0x1b * x56));
+{ uint32_t x59 = (uint32_t) (x58 >> 0x17);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x17);
+{ uint32_t x63 = (x61 & 0x7fffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e230m27/fesquare.h b/src/Specific/solinas32_2e230m27/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e230m27/freeze.c b/src/Specific/solinas32_2e230m27/freeze.c
new file mode 100644
index 000000000..36de528ff
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffe5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e230m27/freeze.h b/src/Specific/solinas32_2e230m27/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e230m27/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e235m15/femul.c b/src/Specific/solinas32_2e235m15/femul.c
new file mode 100644
index 000000000..d65148e6d
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((0x2 * ((uint64_t)x17 * x23)) + ((uint64_t)x19 * x21)))))))) + (0xf * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) + (0xf * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) + (0xf * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((0x2 * ((uint64_t)x11 * x23)) + ((uint64_t)x13 * x21))))) + (0xf * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) + (0xf * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) + (0xf * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0xf * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0xf * ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))));
+{ uint64_t x45 = (x44 >> 0x1b);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint64_t x48 = (x47 >> 0x1a);
+{ uint32_t x49 = ((uint32_t)x47 & 0x3ffffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint64_t x51 = (x50 >> 0x1a);
+{ uint32_t x52 = ((uint32_t)x50 & 0x3ffffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint64_t x54 = (x53 >> 0x1a);
+{ uint32_t x55 = ((uint32_t)x53 & 0x3ffffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint64_t x57 = (x56 >> 0x1a);
+{ uint32_t x58 = ((uint32_t)x56 & 0x3ffffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint64_t x60 = (x59 >> 0x1a);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3ffffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint64_t x63 = (x62 >> 0x1a);
+{ uint32_t x64 = ((uint32_t)x62 & 0x3ffffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint64_t x66 = (x65 >> 0x1a);
+{ uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint64_t x69 = (x68 >> 0x1a);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
+{ uint64_t x71 = (x46 + (0xf * x69));
+{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x1a);
+{ uint32_t x76 = (x74 & 0x3ffffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e235m15/femul.h b/src/Specific/solinas32_2e235m15/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e235m15/fesquare.c b/src/Specific/solinas32_2e235m15/fesquare.c
new file mode 100644
index 000000000..0e06fc098
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0xf * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0xf * ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))));
+{ uint64_t x26 = (x25 >> 0x1b);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint64_t x29 = (x28 >> 0x1a);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint64_t x32 = (x31 >> 0x1a);
+{ uint32_t x33 = ((uint32_t)x31 & 0x3ffffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint64_t x35 = (x34 >> 0x1a);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x1a);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint64_t x41 = (x40 >> 0x1a);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint64_t x44 = (x43 >> 0x1a);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint64_t x47 = (x46 >> 0x1a);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint64_t x50 = (x49 >> 0x1a);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+{ uint64_t x52 = (x27 + (0xf * x50));
+{ uint32_t x53 = (uint32_t) (x52 >> 0x1b);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x1a);
+{ uint32_t x57 = (x55 & 0x3ffffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e235m15/fesquare.h b/src/Specific/solinas32_2e235m15/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e235m15/freeze.c b/src/Specific/solinas32_2e235m15/freeze.c
new file mode 100644
index 000000000..705f35e6e
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e235m15/freeze.h b/src/Specific/solinas32_2e235m15/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e235m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e243m9/femul.c b/src/Specific/solinas32_2e243m9/femul.c
new file mode 100644
index 000000000..883d95b41
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + (((uint64_t)x19 * x23) + ((uint64_t)x18 * x21)))))))));
+{ uint64_t x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) + (0x9 * ((uint64_t)x18 * x34)));
+{ uint64_t x38 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + (((uint64_t)x15 * x23) + ((uint64_t)x17 * x21))))))) + (0x9 * (((uint64_t)x19 * x34) + ((uint64_t)x18 * x35))));
+{ uint64_t x39 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + (((uint64_t)x13 * x23) + ((uint64_t)x15 * x21)))))) + (0x9 * (((uint64_t)x17 * x34) + (((uint64_t)x19 * x35) + ((uint64_t)x18 * x33)))));
+{ uint64_t x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) + (0x9 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x27) + (((uint64_t)x7 * x25) + (((uint64_t)x9 * x23) + ((uint64_t)x11 * x21)))) + (0x9 * (((uint64_t)x13 * x34) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x18 * x29)))))));
+{ uint64_t x42 = ((((uint64_t)x5 * x25) + (((uint64_t)x7 * x23) + ((uint64_t)x9 * x21))) + (0x9 * (((uint64_t)x11 * x34) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x18 * x27))))))));
+{ uint64_t x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) + (0x9 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))));
+{ uint64_t x44 = (((uint64_t)x5 * x21) + (0x9 * (((uint64_t)x7 * x34) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + (((uint64_t)x17 * x27) + (((uint64_t)x19 * x25) + ((uint64_t)x18 * x23))))))))));
+{ uint64_t x45 = (x44 >> 0x1b);
+{ uint32_t x46 = ((uint32_t)x44 & 0x7ffffff);
+{ uint64_t x47 = (x45 + x43);
+{ uint64_t x48 = (x47 >> 0x1b);
+{ uint32_t x49 = ((uint32_t)x47 & 0x7ffffff);
+{ uint64_t x50 = (x48 + x42);
+{ uint64_t x51 = (x50 >> 0x1b);
+{ uint32_t x52 = ((uint32_t)x50 & 0x7ffffff);
+{ uint64_t x53 = (x51 + x41);
+{ uint64_t x54 = (x53 >> 0x1b);
+{ uint32_t x55 = ((uint32_t)x53 & 0x7ffffff);
+{ uint64_t x56 = (x54 + x40);
+{ uint64_t x57 = (x56 >> 0x1b);
+{ uint32_t x58 = ((uint32_t)x56 & 0x7ffffff);
+{ uint64_t x59 = (x57 + x39);
+{ uint64_t x60 = (x59 >> 0x1b);
+{ uint32_t x61 = ((uint32_t)x59 & 0x7ffffff);
+{ uint64_t x62 = (x60 + x38);
+{ uint64_t x63 = (x62 >> 0x1b);
+{ uint32_t x64 = ((uint32_t)x62 & 0x7ffffff);
+{ uint64_t x65 = (x63 + x37);
+{ uint64_t x66 = (x65 >> 0x1b);
+{ uint32_t x67 = ((uint32_t)x65 & 0x7ffffff);
+{ uint64_t x68 = (x66 + x36);
+{ uint64_t x69 = (x68 >> 0x1b);
+{ uint32_t x70 = ((uint32_t)x68 & 0x7ffffff);
+{ uint64_t x71 = (x46 + (0x9 * x69));
+{ uint32_t x72 = (uint32_t) (x71 >> 0x1b);
+{ uint32_t x73 = ((uint32_t)x71 & 0x7ffffff);
+{ uint32_t x74 = (x72 + x49);
+{ uint32_t x75 = (x74 >> 0x1b);
+{ uint32_t x76 = (x74 & 0x7ffffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e243m9/femul.h b/src/Specific/solinas32_2e243m9/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e243m9/fesquare.c b/src/Specific/solinas32_2e243m9/fesquare.c
new file mode 100644
index 000000000..48d5c095a
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x15 * x2)))))))));
+{ uint64_t x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * ((uint64_t)x15 * x15)));
+{ uint64_t x19 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x15) + ((uint64_t)x15 * x16))));
+{ uint64_t x20 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x15) + (((uint64_t)x16 * x16) + ((uint64_t)x15 * x14)))));
+{ uint64_t x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))));
+{ uint64_t x22 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x15) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + ((uint64_t)x15 * x10)))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x15) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + ((uint64_t)x15 * x8))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))));
+{ uint64_t x25 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x15) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + ((uint64_t)x15 * x4))))))))));
+{ uint64_t x26 = (x25 >> 0x1b);
+{ uint32_t x27 = ((uint32_t)x25 & 0x7ffffff);
+{ uint64_t x28 = (x26 + x24);
+{ uint64_t x29 = (x28 >> 0x1b);
+{ uint32_t x30 = ((uint32_t)x28 & 0x7ffffff);
+{ uint64_t x31 = (x29 + x23);
+{ uint64_t x32 = (x31 >> 0x1b);
+{ uint32_t x33 = ((uint32_t)x31 & 0x7ffffff);
+{ uint64_t x34 = (x32 + x22);
+{ uint64_t x35 = (x34 >> 0x1b);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7ffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x1b);
+{ uint32_t x39 = ((uint32_t)x37 & 0x7ffffff);
+{ uint64_t x40 = (x38 + x20);
+{ uint64_t x41 = (x40 >> 0x1b);
+{ uint32_t x42 = ((uint32_t)x40 & 0x7ffffff);
+{ uint64_t x43 = (x41 + x19);
+{ uint64_t x44 = (x43 >> 0x1b);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffffff);
+{ uint64_t x46 = (x44 + x18);
+{ uint64_t x47 = (x46 >> 0x1b);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffffff);
+{ uint64_t x49 = (x47 + x17);
+{ uint64_t x50 = (x49 >> 0x1b);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffffff);
+{ uint64_t x52 = (x27 + (0x9 * x50));
+{ uint32_t x53 = (uint32_t) (x52 >> 0x1b);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffffff);
+{ uint32_t x55 = (x53 + x30);
+{ uint32_t x56 = (x55 >> 0x1b);
+{ uint32_t x57 = (x55 & 0x7ffffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e243m9/fesquare.h b/src/Specific/solinas32_2e243m9/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e243m9/freeze.c b/src/Specific/solinas32_2e243m9/freeze.c
new file mode 100644
index 000000000..81cc1a512
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 27 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e243m9/freeze.h b/src/Specific/solinas32_2e243m9/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e243m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e251m9/femul.c b/src/Specific/solinas32_2e251m9/femul.c
new file mode 100644
index 000000000..f941d0c36
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((0x2 * ((uint64_t)x11 * x35)) + ((0x2 * ((uint64_t)x13 * x33)) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((0x2 * ((uint64_t)x19 * x27)) + ((0x2 * ((uint64_t)x21 * x25)) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((0x2 * ((uint64_t)x17 * x27)) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x9 * ((uint64_t)x20 * x38)));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((0x2 * ((uint64_t)x15 * x27)) + ((0x2 * ((uint64_t)x17 * x25)) + ((uint64_t)x19 * x23)))))))) + (0x9 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((0x2 * ((uint64_t)x13 * x27)) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x9 * (((uint64_t)x19 * x38) + (((uint64_t)x21 * x39) + ((uint64_t)x20 * x37)))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((0x2 * ((uint64_t)x11 * x27)) + ((0x2 * ((uint64_t)x13 * x25)) + ((uint64_t)x15 * x23)))))) + (0x9 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((0x2 * ((uint64_t)x9 * x27)) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x9 * (((uint64_t)x15 * x38) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x20 * x33)))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + ((0x2 * ((uint64_t)x7 * x27)) + ((0x2 * ((uint64_t)x9 * x25)) + ((uint64_t)x11 * x23)))) + (0x9 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x9 * (((uint64_t)x11 * x38) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x20 * x29)))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x9 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x9 * ((0x2 * ((uint64_t)x7 * x38)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((0x2 * ((uint64_t)x13 * x35)) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((0x2 * ((uint64_t)x21 * x27)) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+{ uint64_t x50 = (x49 >> 0x1a);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint64_t x53 = (x52 >> 0x19);
+{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint64_t x56 = (x55 >> 0x19);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint64_t x59 = (x58 >> 0x19);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint64_t x62 = (x61 >> 0x19);
+{ uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint64_t x65 = (x64 >> 0x19);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint64_t x68 = (x67 >> 0x19);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint64_t x71 = (x70 >> 0x19);
+{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint64_t x74 = (x73 >> 0x19);
+{ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint64_t x77 = (x76 >> 0x19);
+{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+{ uint64_t x79 = (x51 + (0x9 * x77));
+{ uint32_t x80 = (uint32_t) (x79 >> 0x1a);
+{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x19);
+{ uint32_t x84 = (x82 & 0x1ffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e251m9/femul.h b/src/Specific/solinas32_2e251m9/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e251m9/fesquare.c b/src/Specific/solinas32_2e251m9/fesquare.c
new file mode 100644
index 000000000..aba6173ef
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * ((uint64_t)x17 * x17)));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x17) + (((uint64_t)x18 * x18) + ((uint64_t)x17 * x16)))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x17) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((uint64_t)x17 * x12)))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x17) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + ((uint64_t)x17 * x8)))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x17)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+{ uint64_t x29 = (x28 >> 0x1a);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint64_t x32 = (x31 >> 0x19);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint64_t x35 = (x34 >> 0x19);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint64_t x38 = (x37 >> 0x19);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint64_t x41 = (x40 >> 0x19);
+{ uint32_t x42 = ((uint32_t)x40 & 0x1ffffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint64_t x44 = (x43 >> 0x19);
+{ uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint64_t x47 = (x46 >> 0x19);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint64_t x50 = (x49 >> 0x19);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint64_t x53 = (x52 >> 0x19);
+{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint64_t x56 = (x55 >> 0x19);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+{ uint64_t x58 = (x30 + (0x9 * x56));
+{ uint32_t x59 = (uint32_t) (x58 >> 0x1a);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x19);
+{ uint32_t x63 = (x61 & 0x1ffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e251m9/fesquare.h b/src/Specific/solinas32_2e251m9/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e251m9/freeze.c b/src/Specific/solinas32_2e251m9/freeze.c
new file mode 100644
index 000000000..3ad3614f2
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e251m9/freeze.h b/src/Specific/solinas32_2e251m9/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e251m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e254m127x2e240m1/freeze.c b/src/Specific/solinas32_2e254m127x2e240m1/freeze.c
new file mode 100644
index 000000000..34c2de5ab
--- /dev/null
+++ b/src/Specific/solinas32_2e254m127x2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x22;
+out[1] = uint8_t x23 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e254m127x2e240m1/freeze.h b/src/Specific/solinas32_2e254m127x2e240m1/freeze.h
new file mode 100644
index 000000000..ccf1cb263
--- /dev/null
+++ b/src/Specific/solinas32_2e254m127x2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x19, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m19/femul.c b/src/Specific/solinas32_2e255m19/femul.c
new file mode 100644
index 000000000..d73360a88
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint64_t x40 = (((uint64_t)x5 * x38) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + (((uint64_t)x21 * x25) + ((uint64_t)x20 * x23))))))))));
+{ uint64_t x41 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + ((0x2 * ((uint64_t)x11 * x33)) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + ((uint64_t)x21 * x23))))))))) + (0x13 * (0x2 * ((uint64_t)x20 * x38))));
+{ uint64_t x42 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((uint64_t)x19 * x23)))))))) + (0x13 * (((uint64_t)x21 * x38) + ((uint64_t)x20 * x39))));
+{ uint64_t x43 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + (((uint64_t)x9 * x31) + ((0x2 * ((uint64_t)x11 * x29)) + (((uint64_t)x13 * x27) + ((0x2 * ((uint64_t)x15 * x25)) + ((uint64_t)x17 * x23))))))) + (0x13 * ((0x2 * ((uint64_t)x19 * x38)) + (((uint64_t)x21 * x39) + (0x2 * ((uint64_t)x20 * x37))))));
+{ uint64_t x44 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + (((uint64_t)x11 * x27) + (((uint64_t)x13 * x25) + ((uint64_t)x15 * x23)))))) + (0x13 * (((uint64_t)x17 * x38) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x20 * x35))))));
+{ uint64_t x45 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + ((0x2 * ((uint64_t)x11 * x25)) + ((uint64_t)x13 * x23))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x38)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + (0x2 * ((uint64_t)x20 * x33))))))));
+{ uint64_t x46 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + ((uint64_t)x11 * x23)))) + (0x13 * (((uint64_t)x13 * x38) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x20 * x31))))))));
+{ uint64_t x47 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((uint64_t)x9 * x23))) + (0x13 * ((0x2 * ((uint64_t)x11 * x38)) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + (((uint64_t)x21 * x31) + (0x2 * ((uint64_t)x20 * x29))))))))));
+{ uint64_t x48 = ((((uint64_t)x5 * x25) + ((uint64_t)x7 * x23)) + (0x13 * (((uint64_t)x9 * x38) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x20 * x27))))))))));
+{ uint64_t x49 = (((uint64_t)x5 * x23) + (0x13 * ((0x2 * ((uint64_t)x7 * x38)) + (((uint64_t)x9 * x39) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + (((uint64_t)x21 * x27) + (0x2 * ((uint64_t)x20 * x25))))))))))));
+{ uint64_t x50 = (x49 >> 0x1a);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+{ uint64_t x52 = (x50 + x48);
+{ uint64_t x53 = (x52 >> 0x19);
+{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+{ uint64_t x55 = (x53 + x47);
+{ uint64_t x56 = (x55 >> 0x1a);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
+{ uint64_t x58 = (x56 + x46);
+{ uint64_t x59 = (x58 >> 0x19);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+{ uint64_t x61 = (x59 + x45);
+{ uint64_t x62 = (x61 >> 0x1a);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
+{ uint64_t x64 = (x62 + x44);
+{ uint64_t x65 = (x64 >> 0x19);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+{ uint64_t x67 = (x65 + x43);
+{ uint64_t x68 = (x67 >> 0x1a);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+{ uint64_t x70 = (x68 + x42);
+{ uint64_t x71 = (x70 >> 0x19);
+{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+{ uint64_t x73 = (x71 + x41);
+{ uint64_t x74 = (x73 >> 0x1a);
+{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+{ uint64_t x76 = (x74 + x40);
+{ uint64_t x77 = (x76 >> 0x19);
+{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+{ uint64_t x79 = (x51 + (0x13 * x77));
+{ uint32_t x80 = (uint32_t) (x79 >> 0x1a);
+{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+{ uint32_t x82 = (x80 + x54);
+{ uint32_t x83 = (x82 >> 0x19);
+{ uint32_t x84 = (x82 & 0x1ffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e255m19/femul.h b/src/Specific/solinas32_2e255m19/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas32_2e255m19/fesquare.c b/src/Specific/solinas32_2e255m19/fesquare.c
new file mode 100644
index 000000000..95a421f6c
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x19 = (((uint64_t)x2 * x17) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x17 * x2))))))))));
+{ uint64_t x20 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * (0x2 * ((uint64_t)x17 * x17))));
+{ uint64_t x21 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x17) + ((uint64_t)x17 * x18))));
+{ uint64_t x22 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + ((0x2 * ((uint64_t)x8 * x8)) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * ((0x2 * ((uint64_t)x16 * x17)) + (((uint64_t)x18 * x18) + (0x2 * ((uint64_t)x17 * x16))))));
+{ uint64_t x23 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x17) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((uint64_t)x17 * x14))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + (((uint64_t)x6 * x6) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x17)) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (0x2 * ((uint64_t)x17 * x12))))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x17) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + ((uint64_t)x17 * x10))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * ((0x2 * ((uint64_t)x8 * x17)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + (0x2 * ((uint64_t)x17 * x8))))))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x17) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((uint64_t)x17 * x6))))))))));
+{ uint64_t x28 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x17)) + (((uint64_t)x6 * x18) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + (((uint64_t)x18 * x6) + (0x2 * ((uint64_t)x17 * x4))))))))))));
+{ uint64_t x29 = (x28 >> 0x1a);
+{ uint32_t x30 = ((uint32_t)x28 & 0x3ffffff);
+{ uint64_t x31 = (x29 + x27);
+{ uint64_t x32 = (x31 >> 0x19);
+{ uint32_t x33 = ((uint32_t)x31 & 0x1ffffff);
+{ uint64_t x34 = (x32 + x26);
+{ uint64_t x35 = (x34 >> 0x1a);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3ffffff);
+{ uint64_t x37 = (x35 + x25);
+{ uint64_t x38 = (x37 >> 0x19);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1ffffff);
+{ uint64_t x40 = (x38 + x24);
+{ uint64_t x41 = (x40 >> 0x1a);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+{ uint64_t x43 = (x41 + x23);
+{ uint64_t x44 = (x43 >> 0x19);
+{ uint32_t x45 = ((uint32_t)x43 & 0x1ffffff);
+{ uint64_t x46 = (x44 + x22);
+{ uint64_t x47 = (x46 >> 0x1a);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+{ uint64_t x49 = (x47 + x21);
+{ uint64_t x50 = (x49 >> 0x19);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+{ uint64_t x52 = (x50 + x20);
+{ uint64_t x53 = (x52 >> 0x1a);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
+{ uint64_t x55 = (x53 + x19);
+{ uint64_t x56 = (x55 >> 0x19);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+{ uint64_t x58 = (x30 + (0x13 * x56));
+{ uint32_t x59 = (uint32_t) (x58 >> 0x1a);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+{ uint32_t x61 = (x59 + x33);
+{ uint32_t x62 = (x61 >> 0x19);
+{ uint32_t x63 = (x61 & 0x1ffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas32_2e255m19/fesquare.h b/src/Specific/solinas32_2e255m19/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m19/freeze.c b/src/Specific/solinas32_2e255m19/freeze.c
new file mode 100644
index 000000000..8ee39b49f
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m19/freeze.h b/src/Specific/solinas32_2e255m19/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas32_2e255m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c
new file mode 100644
index 000000000..58eeb52fd
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.c
@@ -0,0 +1,84 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint64_t x36 = (((uint64_t)x5 * x34) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + (((uint64_t)x15 * x27) + (((uint64_t)x17 * x25) + ((0x2 * ((uint64_t)x19 * x23)) + ((uint64_t)x18 * x21)))))))));
+{ ℤ x37 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + (((uint64_t)x13 * x27) + (((uint64_t)x15 * x25) + (((uint64_t)x17 * x23) + ((uint64_t)x19 * x21)))))))) +ℤ (((uint64_t)x18 * x34) + ((0x2 * ((uint64_t)x18 * x34)) + (0x10 * ((uint64_t)x18 * x34)))));
+{ ℤ x38 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + (((uint64_t)x11 * x27) + ((0x2 * ((uint64_t)x13 * x25)) + ((0x2 * ((uint64_t)x15 * x23)) + ((uint64_t)x17 * x21))))))) +ℤ (((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x19 * x34)) + (0x2 * ((uint64_t)x18 * x35)))))));
+{ ℤ x39 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + (((uint64_t)x9 * x27) + (((uint64_t)x11 * x25) + ((0x2 * ((uint64_t)x13 * x23)) + ((uint64_t)x15 * x21)))))) +ℤ ((((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33))) +ℤ ((0x2 * (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))) +ℤ (0x10 *ℤ (((uint64_t)x17 * x34) + ((0x2 * ((uint64_t)x19 * x35)) + ((uint64_t)x18 * x33)))))));
+{ ℤ x40 = ((((uint64_t)x5 * x29) + (((uint64_t)x7 * x27) + (((uint64_t)x9 * x25) + (((uint64_t)x11 * x23) + ((uint64_t)x13 * x21))))) +ℤ ((((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31)))) +ℤ ((0x2 * (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))) +ℤ (0x10 *ℤ (((uint64_t)x15 * x34) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x18 * x31))))))));
+{ ℤ x41 = ((((uint64_t)x5 * x27) + ((0x2 * ((uint64_t)x7 * x25)) + ((0x2 * ((uint64_t)x9 * x23)) + ((uint64_t)x11 * x21)))) +ℤ (((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x13 * x34)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + (0x2 * ((uint64_t)x18 * x29))))))))));
+{ ℤ x42 = ((((uint64_t)x5 * x25) + ((0x2 * ((uint64_t)x7 * x23)) + ((uint64_t)x9 * x21))) +ℤ ((((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27)))))) +ℤ ((0x2 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))) +ℤ (0x10 *ℤ (((uint64_t)x11 * x34) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x18 * x27))))))))));
+{ ℤ x43 = ((((uint64_t)x5 * x23) + ((uint64_t)x7 * x21)) +ℤ ((((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25))))))) +ℤ ((0x2 * (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))) +ℤ (0x10 *ℤ (((uint64_t)x9 * x34) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + (((uint64_t)x19 * x27) + ((uint64_t)x18 * x25)))))))))));
+{ ℤ x44 = (((uint64_t)x5 * x21) +ℤ (((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x7 * x34)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + (((uint64_t)x17 * x27) + ((0x2 * ((uint64_t)x19 * x25)) + (0x2 * ((uint64_t)x18 * x23)))))))))))));
+{ uint64_t x45 = (x36 >> 0x1c);
+{ uint32_t x46 = ((uint32_t)x36 & 0xfffffff);
+{ uint64_t x47 = ((0x10000000 * x45) + x46);
+{ uint64_t x48 = (x47 >> 0x1c);
+{ uint32_t x49 = ((uint32_t)x47 & 0xfffffff);
+{ uint64_t x50 = ((0x10000000 * x48) + x49);
+{ uint64_t x51 = (x50 >> 0x1c);
+{ uint32_t x52 = ((uint32_t)x50 & 0xfffffff);
+{ uint64_t x53 = ((0x10000000 * x51) + x52);
+{ uint64_t x54 = (x53 >> 0x1c);
+{ uint32_t x55 = ((uint32_t)x53 & 0xfffffff);
+{ ℤ x56 = (x44 +ℤ (x54 + ((0x2 * x54) + (0x10 * x54))));
+{ uint64_t x57 = (x56 >> 0x1d);
+{ uint32_t x58 = (x56 & 0x1fffffff);
+{ ℤ x59 = (x57 +ℤ x43);
+{ uint64_t x60 = (x59 >> 0x1c);
+{ uint32_t x61 = (x59 & 0xfffffff);
+{ ℤ x62 = (x60 +ℤ x42);
+{ uint64_t x63 = (x62 >> 0x1c);
+{ uint32_t x64 = (x62 & 0xfffffff);
+{ ℤ x65 = (x63 +ℤ x41);
+{ uint64_t x66 = (x65 >> 0x1d);
+{ uint32_t x67 = (x65 & 0x1fffffff);
+{ ℤ x68 = (x66 +ℤ x40);
+{ uint64_t x69 = (x68 >> 0x1c);
+{ uint32_t x70 = (x68 & 0xfffffff);
+{ ℤ x71 = (x69 +ℤ x39);
+{ uint64_t x72 = (x71 >> 0x1c);
+{ uint32_t x73 = (x71 & 0xfffffff);
+{ ℤ x74 = (x72 +ℤ x38);
+{ uint64_t x75 = (x74 >> 0x1d);
+{ uint32_t x76 = (x74 & 0x1fffffff);
+{ ℤ x77 = (x75 +ℤ x37);
+{ uint64_t x78 = (x77 >> 0x1c);
+{ uint32_t x79 = (x77 & 0xfffffff);
+{ uint64_t x80 = (x78 + x55);
+{ uint32_t x81 = (uint32_t) (x80 >> 0x1c);
+{ uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
+{ uint32_t x83 = (x58 + (x81 + ((0x2 * x81) + (0x10 * x81))));
+{ uint32_t x84 = (x83 >> 0x1d);
+{ uint32_t x85 = (x83 & 0x1fffffff);
+{ uint32_t x86 = (x85 >> 0x1d);
+{ uint32_t x87 = (x85 & 0x1fffffff);
+{ uint32_t x88 = (x87 >> 0x1d);
+{ uint32_t x89 = (x87 & 0x1fffffff);
+out[0] = x82;
+out[1] = x79;
+out[2] = x76;
+out[3] = x73;
+out[4] = x70;
+out[5] = x67;
+out[6] = x64;
+out[7] = x88 + x86 + x84 + x61;
+out[8] = x89;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/femul.h b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c
new file mode 100644
index 000000000..6c2274f91
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.c
@@ -0,0 +1,84 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x17 = (((uint64_t)x2 * x15) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x15 * x2)))))))));
+{ ℤ x18 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x15 * x15) + ((0x2 * ((uint64_t)x15 * x15)) + (0x10 * ((uint64_t)x15 * x15)))));
+{ ℤ x19 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16))) +ℤ ((0x2 * ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x16 * x15)) + (0x2 * ((uint64_t)x15 * x16)))))));
+{ ℤ x20 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) +ℤ ((((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14))) +ℤ ((0x2 * (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))) +ℤ (0x10 *ℤ (((uint64_t)x14 * x15) + ((0x2 * ((uint64_t)x16 * x16)) + ((uint64_t)x15 * x14)))))));
+{ ℤ x21 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ ((((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12)))) +ℤ ((0x2 * (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))) +ℤ (0x10 *ℤ (((uint64_t)x12 * x15) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + ((uint64_t)x15 * x12))))))));
+{ ℤ x22 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10)))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x10 * x15)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + (0x2 * ((uint64_t)x15 * x10))))))))));
+{ ℤ x23 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ ((((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8)))))) +ℤ ((0x2 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))) +ℤ (0x10 *ℤ (((uint64_t)x8 * x15) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((uint64_t)x15 * x8))))))))));
+{ ℤ x24 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ ((((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6))))))) +ℤ ((0x2 * (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))) +ℤ (0x10 *ℤ (((uint64_t)x6 * x15) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + ((uint64_t)x15 * x6)))))))))));
+{ ℤ x25 = (((uint64_t)x2 * x2) +ℤ (((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4))))))))) +ℤ ((0x2 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))) +ℤ (0x10 *ℤ ((0x2 * ((uint64_t)x4 * x15)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + (0x2 * ((uint64_t)x15 * x4)))))))))))));
+{ uint64_t x26 = (x17 >> 0x1c);
+{ uint32_t x27 = ((uint32_t)x17 & 0xfffffff);
+{ uint64_t x28 = ((0x10000000 * x26) + x27);
+{ uint64_t x29 = (x28 >> 0x1c);
+{ uint32_t x30 = ((uint32_t)x28 & 0xfffffff);
+{ uint64_t x31 = ((0x10000000 * x29) + x30);
+{ uint64_t x32 = (x31 >> 0x1c);
+{ uint32_t x33 = ((uint32_t)x31 & 0xfffffff);
+{ uint64_t x34 = ((0x10000000 * x32) + x33);
+{ uint64_t x35 = (x34 >> 0x1c);
+{ uint32_t x36 = ((uint32_t)x34 & 0xfffffff);
+{ ℤ x37 = (x25 +ℤ (x35 + ((0x2 * x35) + (0x10 * x35))));
+{ uint64_t x38 = (x37 >> 0x1d);
+{ uint32_t x39 = (x37 & 0x1fffffff);
+{ ℤ x40 = (x38 +ℤ x24);
+{ uint64_t x41 = (x40 >> 0x1c);
+{ uint32_t x42 = (x40 & 0xfffffff);
+{ ℤ x43 = (x41 +ℤ x23);
+{ uint64_t x44 = (x43 >> 0x1c);
+{ uint32_t x45 = (x43 & 0xfffffff);
+{ ℤ x46 = (x44 +ℤ x22);
+{ uint64_t x47 = (x46 >> 0x1d);
+{ uint32_t x48 = (x46 & 0x1fffffff);
+{ ℤ x49 = (x47 +ℤ x21);
+{ uint64_t x50 = (x49 >> 0x1c);
+{ uint32_t x51 = (x49 & 0xfffffff);
+{ ℤ x52 = (x50 +ℤ x20);
+{ uint64_t x53 = (x52 >> 0x1c);
+{ uint32_t x54 = (x52 & 0xfffffff);
+{ ℤ x55 = (x53 +ℤ x19);
+{ uint64_t x56 = (x55 >> 0x1d);
+{ uint32_t x57 = (x55 & 0x1fffffff);
+{ ℤ x58 = (x56 +ℤ x18);
+{ uint64_t x59 = (x58 >> 0x1c);
+{ uint32_t x60 = (x58 & 0xfffffff);
+{ uint64_t x61 = (x59 + x36);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x1c);
+{ uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
+{ uint32_t x64 = (x39 + (x62 + ((0x2 * x62) + (0x10 * x62))));
+{ uint32_t x65 = (x64 >> 0x1d);
+{ uint32_t x66 = (x64 & 0x1fffffff);
+{ uint32_t x67 = (x66 >> 0x1d);
+{ uint32_t x68 = (x66 & 0x1fffffff);
+{ uint32_t x69 = (x68 >> 0x1d);
+{ uint32_t x70 = (x68 & 0x1fffffff);
+out[0] = x63;
+out[1] = x60;
+out[2] = x57;
+out[3] = x54;
+out[4] = x51;
+out[5] = x48;
+out[6] = x45;
+out[7] = x69 + x67 + x65 + x42;
+out[8] = x70;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.h b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c
new file mode 100644
index 000000000..1e8658d1b
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.h b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas32_2e255m2e4m2e1m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m765/femul.c b/src/Specific/solinas32_2e255m765/femul.c
new file mode 100644
index 000000000..c302e2221
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x2fd * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x2fd * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x2fd * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x2fd * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x2fd * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x2fd * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x2fd * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x2fd * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x2fd * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x2fd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x2fd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint64_t x60 = (x59 >> 0x16);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+{ uint64_t x62 = (x60 + x58);
+{ uint64_t x63 = (x62 >> 0x15);
+{ uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
+{ uint64_t x65 = (x63 + x57);
+{ uint64_t x66 = (x65 >> 0x15);
+{ uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
+{ uint64_t x68 = (x66 + x56);
+{ uint64_t x69 = (x68 >> 0x15);
+{ uint32_t x70 = ((uint32_t)x68 & 0x1fffff);
+{ uint64_t x71 = (x69 + x55);
+{ uint64_t x72 = (x71 >> 0x16);
+{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+{ uint64_t x74 = (x72 + x54);
+{ uint64_t x75 = (x74 >> 0x15);
+{ uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint64_t x78 = (x77 >> 0x15);
+{ uint32_t x79 = ((uint32_t)x77 & 0x1fffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint64_t x81 = (x80 >> 0x15);
+{ uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint64_t x84 = (x83 >> 0x16);
+{ uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint64_t x87 = (x86 >> 0x15);
+{ uint32_t x88 = ((uint32_t)x86 & 0x1fffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint64_t x90 = (x89 >> 0x15);
+{ uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x15);
+{ uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
+{ uint64_t x95 = (x61 + ((uint64_t)0x2fd * x93));
+{ uint32_t x96 = (uint32_t) (x95 >> 0x16);
+{ uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x15);
+{ uint32_t x100 = (x98 & 0x1fffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e255m765/femul.h b/src/Specific/solinas32_2e255m765/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e255m765/fesquare.c b/src/Specific/solinas32_2e255m765/fesquare.c
new file mode 100644
index 000000000..a4ae3fa15
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x2fd * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x2fd * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x2fd * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x2fd * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x2fd * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x2fd * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x2fd * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x2fd * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x2fd * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x2fd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x2fd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint64_t x35 = (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint64_t x38 = (x37 >> 0x15);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint64_t x41 = (x40 >> 0x15);
+{ uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint64_t x44 = (x43 >> 0x15);
+{ uint32_t x45 = ((uint32_t)x43 & 0x1fffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint64_t x47 = (x46 >> 0x16);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x15);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x15);
+{ uint32_t x54 = ((uint32_t)x52 & 0x1fffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint64_t x56 = (x55 >> 0x15);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint64_t x59 = (x58 >> 0x16);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint64_t x62 = (x61 >> 0x15);
+{ uint32_t x63 = ((uint32_t)x61 & 0x1fffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint64_t x65 = (x64 >> 0x15);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x15);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+{ uint64_t x70 = (x36 + ((uint64_t)0x2fd * x68));
+{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x15);
+{ uint32_t x75 = (x73 & 0x1fffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e255m765/fesquare.h b/src/Specific/solinas32_2e255m765/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e255m765/freeze.c b/src/Specific/solinas32_2e255m765/freeze.c
new file mode 100644
index 000000000..4fb73d4cc
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffd03;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e255m765/freeze.h b/src/Specific/solinas32_2e255m765/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e255m765/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m189/femul.c b/src/Specific/solinas32_2e256m189/femul.c
new file mode 100644
index 000000000..3c9d8ac20
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0xbd * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0xbd * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0xbd * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0xbd * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0xbd * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0xbd * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + (0xbd * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0xbd * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0xbd * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0xbd * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0xbd * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint64_t x60 = (x59 >> 0x16);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+{ uint64_t x62 = (x60 + x58);
+{ uint64_t x63 = (x62 >> 0x15);
+{ uint32_t x64 = ((uint32_t)x62 & 0x1fffff);
+{ uint64_t x65 = (x63 + x57);
+{ uint64_t x66 = (x65 >> 0x15);
+{ uint32_t x67 = ((uint32_t)x65 & 0x1fffff);
+{ uint64_t x68 = (x66 + x56);
+{ uint64_t x69 = (x68 >> 0x16);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+{ uint64_t x71 = (x69 + x55);
+{ uint64_t x72 = (x71 >> 0x15);
+{ uint32_t x73 = ((uint32_t)x71 & 0x1fffff);
+{ uint64_t x74 = (x72 + x54);
+{ uint64_t x75 = (x74 >> 0x15);
+{ uint32_t x76 = ((uint32_t)x74 & 0x1fffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint64_t x78 = (x77 >> 0x16);
+{ uint32_t x79 = ((uint32_t)x77 & 0x3fffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint64_t x81 = (x80 >> 0x15);
+{ uint32_t x82 = ((uint32_t)x80 & 0x1fffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint64_t x84 = (x83 >> 0x15);
+{ uint32_t x85 = ((uint32_t)x83 & 0x1fffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint64_t x87 = (x86 >> 0x16);
+{ uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint64_t x90 = (x89 >> 0x15);
+{ uint32_t x91 = ((uint32_t)x89 & 0x1fffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x15);
+{ uint32_t x94 = ((uint32_t)x92 & 0x1fffff);
+{ uint64_t x95 = (x61 + ((uint64_t)0xbd * x93));
+{ uint32_t x96 = (uint32_t) (x95 >> 0x16);
+{ uint32_t x97 = ((uint32_t)x95 & 0x3fffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x15);
+{ uint32_t x100 = (x98 & 0x1fffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e256m189/femul.h b/src/Specific/solinas32_2e256m189/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e256m189/fesquare.c b/src/Specific/solinas32_2e256m189/fesquare.c
new file mode 100644
index 000000000..5538791c9
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xbd * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0xbd * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0xbd * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xbd * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0xbd * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0xbd * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xbd * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0xbd * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0xbd * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xbd * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0xbd * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint64_t x35 = (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint64_t x38 = (x37 >> 0x15);
+{ uint32_t x39 = ((uint32_t)x37 & 0x1fffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint64_t x41 = (x40 >> 0x15);
+{ uint32_t x42 = ((uint32_t)x40 & 0x1fffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint64_t x44 = (x43 >> 0x16);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint64_t x47 = (x46 >> 0x15);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x15);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1fffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x16);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3fffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint64_t x56 = (x55 >> 0x15);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1fffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint64_t x59 = (x58 >> 0x15);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1fffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint64_t x62 = (x61 >> 0x16);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint64_t x65 = (x64 >> 0x15);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x15);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+{ uint64_t x70 = (x36 + ((uint64_t)0xbd * x68));
+{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x15);
+{ uint32_t x75 = (x73 & 0x1fffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e256m189/fesquare.h b/src/Specific/solinas32_2e256m189/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m189/freeze.c b/src/Specific/solinas32_2e256m189/freeze.c
new file mode 100644
index 000000000..f0233bbaa
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fff43;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m189/freeze.h b/src/Specific/solinas32_2e256m189/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e256m189/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c
new file mode 100644
index 000000000..831304167
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.h b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e224p2e192p2e96m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m2e32m977/femul.c b/src/Specific/solinas32_2e256m2e32m977/femul.c
new file mode 100644
index 000000000..e4222a9ab
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/femul.c
@@ -0,0 +1,97 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = ((((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27)))))))))))) + (0x800 * ((uint64_t)x24 * x46)));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + ((0x3d1 * ((uint64_t)x24 * x46)) + (0x400 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47))))));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + ((0x2 * ((uint64_t)x13 * x37)) + ((0x2 * ((uint64_t)x15 * x35)) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x25 * x46)) + (0x2 * ((uint64_t)x24 * x47)))) + (0x800 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45))))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((0x2 * ((uint64_t)x13 * x35)) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + ((0x3d1 * (((uint64_t)x23 * x46) + ((0x2 * ((uint64_t)x25 * x47)) + ((uint64_t)x24 * x45)))) + (0x800 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43)))))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + ((0x3d1 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))) + (0x400 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41)))))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + (((uint64_t)x11 * x33) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + (((uint64_t)x23 * x45) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))) + (0x800 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39)))))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + ((0x3d1 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))) + (0x800 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37))))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) + ((0x3d1 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))) + (0x400 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35))))))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + ((0x3d1 * ((0x2 * ((uint64_t)x13 * x46)) + ((0x2 * ((uint64_t)x15 * x47)) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + ((0x2 * ((uint64_t)x25 * x37)) + (0x2 * ((uint64_t)x24 * x35)))))))))) + (0x800 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33))))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + ((0x3d1 * (((uint64_t)x11 * x46) + ((0x2 * ((uint64_t)x13 * x47)) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + ((0x2 * ((uint64_t)x25 * x35)) + ((uint64_t)x24 * x33)))))))))) + (0x800 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31)))))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + ((0x3d1 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))) + (0x400 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29)))))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x3d1 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + (((uint64_t)x11 * x45) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + (((uint64_t)x23 * x33) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint64_t x60 = (x59 >> 0x16);
+{ uint32_t x61 = ((uint32_t)x59 & 0x3fffff);
+{ uint64_t x62 = (x48 >> 0x15);
+{ uint32_t x63 = ((uint32_t)x48 & 0x1fffff);
+{ uint64_t x64 = ((0x200000 * x62) + x63);
+{ uint64_t x65 = (x64 >> 0x15);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1fffff);
+{ uint64_t x67 = ((x60 + x58) + (0x400 * x65));
+{ uint64_t x68 = (x67 >> 0x15);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1fffff);
+{ uint64_t x70 = (x61 + (0x3d1 * x65));
+{ uint32_t x71 = (uint32_t) (x70 >> 0x16);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3fffff);
+{ uint64_t x73 = (x68 + x57);
+{ uint64_t x74 = (x73 >> 0x15);
+{ uint32_t x75 = ((uint32_t)x73 & 0x1fffff);
+{ uint64_t x76 = (x74 + x56);
+{ uint64_t x77 = (x76 >> 0x16);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3fffff);
+{ uint64_t x79 = (x77 + x55);
+{ uint64_t x80 = (x79 >> 0x15);
+{ uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x15);
+{ uint32_t x84 = ((uint32_t)x82 & 0x1fffff);
+{ uint64_t x85 = (x83 + x53);
+{ uint64_t x86 = (x85 >> 0x16);
+{ uint32_t x87 = ((uint32_t)x85 & 0x3fffff);
+{ uint64_t x88 = (x86 + x52);
+{ uint64_t x89 = (x88 >> 0x15);
+{ uint32_t x90 = ((uint32_t)x88 & 0x1fffff);
+{ uint64_t x91 = (x89 + x51);
+{ uint64_t x92 = (x91 >> 0x15);
+{ uint32_t x93 = ((uint32_t)x91 & 0x1fffff);
+{ uint64_t x94 = (x92 + x50);
+{ uint64_t x95 = (x94 >> 0x16);
+{ uint32_t x96 = ((uint32_t)x94 & 0x3fffff);
+{ uint64_t x97 = (x95 + x49);
+{ uint64_t x98 = (x97 >> 0x15);
+{ uint32_t x99 = ((uint32_t)x97 & 0x1fffff);
+{ uint64_t x100 = (x98 + x66);
+{ uint32_t x101 = (uint32_t) (x100 >> 0x15);
+{ uint32_t x102 = ((uint32_t)x100 & 0x1fffff);
+{ uint64_t x103 = (((uint64_t)0x200000 * x101) + x102);
+{ uint32_t x104 = (uint32_t) (x103 >> 0x15);
+{ uint32_t x105 = ((uint32_t)x103 & 0x1fffff);
+{ uint32_t x106 = ((x71 + x69) + (0x400 * x104));
+{ uint32_t x107 = (x106 >> 0x15);
+{ uint32_t x108 = (x106 & 0x1fffff);
+{ uint32_t x109 = (x72 + (0x3d1 * x104));
+{ uint32_t x110 = (x109 >> 0x16);
+{ uint32_t x111 = (x109 & 0x3fffff);
+out[0] = x105;
+out[1] = x99;
+out[2] = x96;
+out[3] = x93;
+out[4] = x90;
+out[5] = x87;
+out[6] = x84;
+out[7] = x81;
+out[8] = x78;
+out[9] = x107 + x75;
+out[10] = x110 + x108;
+out[11] = x111;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e256m2e32m977/femul.h b/src/Specific/solinas32_2e256m2e32m977/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e256m2e32m977/fesquare.c b/src/Specific/solinas32_2e256m2e32m977/fesquare.c
new file mode 100644
index 000000000..bc8569a5f
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/fesquare.c
@@ -0,0 +1,97 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = ((((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2)))))))))))) + (0x800 * ((uint64_t)x21 * x21)));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + ((0x3d1 * ((uint64_t)x21 * x21)) + (0x400 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22))))));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x22 * x21)) + (0x2 * ((uint64_t)x21 * x22)))) + (0x800 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + ((0x2 * ((uint64_t)x10 * x10)) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + ((0x3d1 * (((uint64_t)x20 * x21) + ((0x2 * ((uint64_t)x22 * x22)) + ((uint64_t)x21 * x20)))) + (0x800 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18)))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + ((0x3d1 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))) + (0x400 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16)))))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + (((uint64_t)x8 * x8) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + ((0x3d1 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + (((uint64_t)x20 * x20) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))) + (0x800 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14)))))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + ((0x3d1 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))) + (0x800 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12))))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + ((0x3d1 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))) + (0x400 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10))))))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + ((0x3d1 * ((0x2 * ((uint64_t)x10 * x21)) + ((0x2 * ((uint64_t)x12 * x22)) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + ((0x2 * ((uint64_t)x22 * x12)) + (0x2 * ((uint64_t)x21 * x10)))))))))) + (0x800 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8))))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + ((0x3d1 * (((uint64_t)x8 * x21) + ((0x2 * ((uint64_t)x10 * x22)) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + ((0x2 * ((uint64_t)x22 * x10)) + ((uint64_t)x21 * x8)))))))))) + (0x800 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6)))))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + ((0x3d1 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))) + (0x400 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4)))))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x3d1 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + (((uint64_t)x8 * x20) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + (((uint64_t)x20 * x8) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint64_t x35 = (x34 >> 0x16);
+{ uint32_t x36 = ((uint32_t)x34 & 0x3fffff);
+{ uint64_t x37 = (x23 >> 0x15);
+{ uint32_t x38 = ((uint32_t)x23 & 0x1fffff);
+{ uint64_t x39 = ((0x200000 * x37) + x38);
+{ uint64_t x40 = (x39 >> 0x15);
+{ uint32_t x41 = ((uint32_t)x39 & 0x1fffff);
+{ uint64_t x42 = ((x35 + x33) + (0x400 * x40));
+{ uint64_t x43 = (x42 >> 0x15);
+{ uint32_t x44 = ((uint32_t)x42 & 0x1fffff);
+{ uint64_t x45 = (x36 + (0x3d1 * x40));
+{ uint32_t x46 = (uint32_t) (x45 >> 0x16);
+{ uint32_t x47 = ((uint32_t)x45 & 0x3fffff);
+{ uint64_t x48 = (x43 + x32);
+{ uint64_t x49 = (x48 >> 0x15);
+{ uint32_t x50 = ((uint32_t)x48 & 0x1fffff);
+{ uint64_t x51 = (x49 + x31);
+{ uint64_t x52 = (x51 >> 0x16);
+{ uint32_t x53 = ((uint32_t)x51 & 0x3fffff);
+{ uint64_t x54 = (x52 + x30);
+{ uint64_t x55 = (x54 >> 0x15);
+{ uint32_t x56 = ((uint32_t)x54 & 0x1fffff);
+{ uint64_t x57 = (x55 + x29);
+{ uint64_t x58 = (x57 >> 0x15);
+{ uint32_t x59 = ((uint32_t)x57 & 0x1fffff);
+{ uint64_t x60 = (x58 + x28);
+{ uint64_t x61 = (x60 >> 0x16);
+{ uint32_t x62 = ((uint32_t)x60 & 0x3fffff);
+{ uint64_t x63 = (x61 + x27);
+{ uint64_t x64 = (x63 >> 0x15);
+{ uint32_t x65 = ((uint32_t)x63 & 0x1fffff);
+{ uint64_t x66 = (x64 + x26);
+{ uint64_t x67 = (x66 >> 0x15);
+{ uint32_t x68 = ((uint32_t)x66 & 0x1fffff);
+{ uint64_t x69 = (x67 + x25);
+{ uint64_t x70 = (x69 >> 0x16);
+{ uint32_t x71 = ((uint32_t)x69 & 0x3fffff);
+{ uint64_t x72 = (x70 + x24);
+{ uint64_t x73 = (x72 >> 0x15);
+{ uint32_t x74 = ((uint32_t)x72 & 0x1fffff);
+{ uint64_t x75 = (x73 + x41);
+{ uint32_t x76 = (uint32_t) (x75 >> 0x15);
+{ uint32_t x77 = ((uint32_t)x75 & 0x1fffff);
+{ uint64_t x78 = (((uint64_t)0x200000 * x76) + x77);
+{ uint32_t x79 = (uint32_t) (x78 >> 0x15);
+{ uint32_t x80 = ((uint32_t)x78 & 0x1fffff);
+{ uint32_t x81 = ((x46 + x44) + (0x400 * x79));
+{ uint32_t x82 = (x81 >> 0x15);
+{ uint32_t x83 = (x81 & 0x1fffff);
+{ uint32_t x84 = (x47 + (0x3d1 * x79));
+{ uint32_t x85 = (x84 >> 0x16);
+{ uint32_t x86 = (x84 & 0x3fffff);
+out[0] = x80;
+out[1] = x74;
+out[2] = x71;
+out[3] = x68;
+out[4] = x65;
+out[5] = x62;
+out[6] = x59;
+out[7] = x56;
+out[8] = x53;
+out[9] = x82 + x50;
+out[10] = x85 + x83;
+out[11] = x86;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e256m2e32m977/fesquare.h b/src/Specific/solinas32_2e256m2e32m977/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m2e32m977/freeze.c b/src/Specific/solinas32_2e256m2e32m977/freeze.c
new file mode 100644
index 000000000..816df7d43
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffc2f;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m2e32m977/freeze.h b/src/Specific/solinas32_2e256m2e32m977/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e256m2e32m977/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e256m88x2e240m1/freeze.c b/src/Specific/solinas32_2e256m88x2e240m1/freeze.c
new file mode 100644
index 000000000..831304167
--- /dev/null
+++ b/src/Specific/solinas32_2e256m88x2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 22 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e256m88x2e240m1/freeze.h b/src/Specific/solinas32_2e256m88x2e240m1/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e256m88x2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e266m3/femul.c b/src/Specific/solinas32_2e266m3/femul.c
new file mode 100644
index 000000000..00a030310
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((0x2 * ((uint64_t)x19 * x35)) + ((0x2 * ((uint64_t)x21 * x33)) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((0x2 * ((uint64_t)x19 * x33)) + ((0x2 * ((uint64_t)x21 * x31)) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((0x2 * ((uint64_t)x19 * x31)) + ((0x2 * ((uint64_t)x21 * x29)) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((0x2 * ((uint64_t)x9 * x35)) + ((0x2 * ((uint64_t)x11 * x33)) + ((0x2 * ((uint64_t)x13 * x31)) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x3 * ((0x2 * ((uint64_t)x19 * x46)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + (0x2 * ((uint64_t)x24 * x41))))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + ((0x2 * ((uint64_t)x7 * x35)) + ((0x2 * ((uint64_t)x9 * x33)) + ((0x2 * ((uint64_t)x11 * x31)) + ((0x2 * ((uint64_t)x13 * x29)) + ((uint64_t)x15 * x27)))))) + (0x3 * (((uint64_t)x17 * x46) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((uint64_t)x24 * x39))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x3 * (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x3 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((0x2 * ((uint64_t)x19 * x41)) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x3 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x3 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + (((uint64_t)x17 * x39) + ((0x2 * ((uint64_t)x19 * x37)) + ((0x2 * ((uint64_t)x21 * x35)) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint32_t x60 = (uint32_t) (x59 >> 0x17);
+{ uint32_t x61 = ((uint32_t)x59 & 0x7fffff);
+{ uint64_t x62 = (x60 + x58);
+{ uint32_t x63 = (uint32_t) (x62 >> 0x16);
+{ uint32_t x64 = ((uint32_t)x62 & 0x3fffff);
+{ uint64_t x65 = (x63 + x57);
+{ uint32_t x66 = (uint32_t) (x65 >> 0x16);
+{ uint32_t x67 = ((uint32_t)x65 & 0x3fffff);
+{ uint64_t x68 = (x66 + x56);
+{ uint32_t x69 = (uint32_t) (x68 >> 0x16);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3fffff);
+{ uint64_t x71 = (x69 + x55);
+{ uint32_t x72 = (uint32_t) (x71 >> 0x16);
+{ uint32_t x73 = ((uint32_t)x71 & 0x3fffff);
+{ uint64_t x74 = (x72 + x54);
+{ uint32_t x75 = (uint32_t) (x74 >> 0x16);
+{ uint32_t x76 = ((uint32_t)x74 & 0x3fffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint32_t x78 = (uint32_t) (x77 >> 0x17);
+{ uint32_t x79 = ((uint32_t)x77 & 0x7fffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint32_t x81 = (uint32_t) (x80 >> 0x16);
+{ uint32_t x82 = ((uint32_t)x80 & 0x3fffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint32_t x84 = (uint32_t) (x83 >> 0x16);
+{ uint32_t x85 = ((uint32_t)x83 & 0x3fffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint32_t x87 = (uint32_t) (x86 >> 0x16);
+{ uint32_t x88 = ((uint32_t)x86 & 0x3fffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint32_t x90 = (uint32_t) (x89 >> 0x16);
+{ uint32_t x91 = ((uint32_t)x89 & 0x3fffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x16);
+{ uint32_t x94 = ((uint32_t)x92 & 0x3fffff);
+{ uint32_t x95 = (x61 + (0x3 * x93));
+{ uint32_t x96 = (x95 >> 0x17);
+{ uint32_t x97 = (x95 & 0x7fffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x16);
+{ uint32_t x100 = (x98 & 0x3fffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e266m3/femul.h b/src/Specific/solinas32_2e266m3/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e266m3/fesquare.c b/src/Specific/solinas32_2e266m3/fesquare.c
new file mode 100644
index 000000000..825e82c23
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x3 * ((0x2 * ((uint64_t)x16 * x21)) + ((0x2 * ((uint64_t)x18 * x22)) + ((0x2 * ((uint64_t)x20 * x20)) + ((0x2 * ((uint64_t)x22 * x18)) + (0x2 * ((uint64_t)x21 * x16))))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x3 * (((uint64_t)x14 * x21) + ((0x2 * ((uint64_t)x16 * x22)) + ((0x2 * ((uint64_t)x18 * x20)) + ((0x2 * ((uint64_t)x20 * x18)) + ((0x2 * ((uint64_t)x22 * x16)) + ((uint64_t)x21 * x14))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x3 * (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x3 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + ((0x2 * ((uint64_t)x16 * x16)) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x3 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x3 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + (((uint64_t)x14 * x14) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint32_t x35 = (uint32_t) (x34 >> 0x17);
+{ uint32_t x36 = ((uint32_t)x34 & 0x7fffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint32_t x38 = (uint32_t) (x37 >> 0x16);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3fffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint32_t x41 = (uint32_t) (x40 >> 0x16);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3fffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint32_t x44 = (uint32_t) (x43 >> 0x16);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3fffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x16);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3fffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x16);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3fffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x17);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x16);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3fffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x16);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3fffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x16);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3fffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x16);
+{ uint32_t x66 = ((uint32_t)x64 & 0x3fffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x16);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3fffff);
+{ uint32_t x70 = (x36 + (0x3 * x68));
+{ uint32_t x71 = (x70 >> 0x17);
+{ uint32_t x72 = (x70 & 0x7fffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x16);
+{ uint32_t x75 = (x73 & 0x3fffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e266m3/fesquare.h b/src/Specific/solinas32_2e266m3/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e266m3/freeze.c b/src/Specific/solinas32_2e266m3/freeze.c
new file mode 100644
index 000000000..fcba7bd38
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e266m3/freeze.h b/src/Specific/solinas32_2e266m3/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e266m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e285m9/femul.c b/src/Specific/solinas32_2e285m9/femul.c
new file mode 100644
index 000000000..54099ec33
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/femul.c
@@ -0,0 +1,101 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33)
+{ uint64_t x60 = (((uint64_t)x5 * x58) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + (((uint64_t)x31 * x35) + ((uint64_t)x30 * x33)))))))))))))));
+{ uint64_t x61 = ((((uint64_t)x5 * x59) + (((uint64_t)x7 * x57) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x31 * x33)))))))))))))) + (0x9 * ((uint64_t)x30 * x58)));
+{ uint64_t x62 = ((((uint64_t)x5 * x57) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x29 * x33))))))))))))) + (0x9 * (((uint64_t)x31 * x58) + ((uint64_t)x30 * x59))));
+{ uint64_t x63 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x27 * x33)))))))))))) + (0x9 * (((uint64_t)x29 * x58) + (((uint64_t)x31 * x59) + ((uint64_t)x30 * x57)))));
+{ uint64_t x64 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + ((uint64_t)x25 * x33))))))))))) + (0x9 * (((uint64_t)x27 * x58) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + ((uint64_t)x30 * x55))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + ((uint64_t)x23 * x33)))))))))) + (0x9 * (((uint64_t)x25 * x58) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + ((uint64_t)x30 * x53)))))));
+{ uint64_t x66 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + ((uint64_t)x21 * x33))))))))) + (0x9 * (((uint64_t)x23 * x58) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x30 * x51))))))));
+{ uint64_t x67 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + ((uint64_t)x19 * x33)))))))) + (0x9 * (((uint64_t)x21 * x58) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + ((uint64_t)x30 * x49)))))))));
+{ uint64_t x68 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33))))))) + (0x9 * (((uint64_t)x19 * x58) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + ((uint64_t)x30 * x47))))))))));
+{ uint64_t x69 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + ((uint64_t)x15 * x33)))))) + (0x9 * (((uint64_t)x17 * x58) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + ((uint64_t)x30 * x45)))))))))));
+{ uint64_t x70 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + ((uint64_t)x13 * x33))))) + (0x9 * (((uint64_t)x15 * x58) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + ((uint64_t)x30 * x43))))))))))));
+{ uint64_t x71 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + ((uint64_t)x11 * x33)))) + (0x9 * (((uint64_t)x13 * x58) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + ((uint64_t)x30 * x41)))))))))))));
+{ uint64_t x72 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + ((uint64_t)x9 * x33))) + (0x9 * (((uint64_t)x11 * x58) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + ((uint64_t)x30 * x39))))))))))))));
+{ uint64_t x73 = ((((uint64_t)x5 * x35) + ((uint64_t)x7 * x33)) + (0x9 * (((uint64_t)x9 * x58) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + ((uint64_t)x30 * x37)))))))))))))));
+{ uint64_t x74 = (((uint64_t)x5 * x33) + (0x9 * (((uint64_t)x7 * x58) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x30 * x35))))))))))))))));
+{ uint32_t x75 = (uint32_t) (x74 >> 0x13);
+{ uint32_t x76 = ((uint32_t)x74 & 0x7ffff);
+{ uint64_t x77 = (x75 + x73);
+{ uint32_t x78 = (uint32_t) (x77 >> 0x13);
+{ uint32_t x79 = ((uint32_t)x77 & 0x7ffff);
+{ uint64_t x80 = (x78 + x72);
+{ uint32_t x81 = (uint32_t) (x80 >> 0x13);
+{ uint32_t x82 = ((uint32_t)x80 & 0x7ffff);
+{ uint64_t x83 = (x81 + x71);
+{ uint32_t x84 = (uint32_t) (x83 >> 0x13);
+{ uint32_t x85 = ((uint32_t)x83 & 0x7ffff);
+{ uint64_t x86 = (x84 + x70);
+{ uint32_t x87 = (uint32_t) (x86 >> 0x13);
+{ uint32_t x88 = ((uint32_t)x86 & 0x7ffff);
+{ uint64_t x89 = (x87 + x69);
+{ uint32_t x90 = (uint32_t) (x89 >> 0x13);
+{ uint32_t x91 = ((uint32_t)x89 & 0x7ffff);
+{ uint64_t x92 = (x90 + x68);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x13);
+{ uint32_t x94 = ((uint32_t)x92 & 0x7ffff);
+{ uint64_t x95 = (x93 + x67);
+{ uint32_t x96 = (uint32_t) (x95 >> 0x13);
+{ uint32_t x97 = ((uint32_t)x95 & 0x7ffff);
+{ uint64_t x98 = (x96 + x66);
+{ uint32_t x99 = (uint32_t) (x98 >> 0x13);
+{ uint32_t x100 = ((uint32_t)x98 & 0x7ffff);
+{ uint64_t x101 = (x99 + x65);
+{ uint32_t x102 = (uint32_t) (x101 >> 0x13);
+{ uint32_t x103 = ((uint32_t)x101 & 0x7ffff);
+{ uint64_t x104 = (x102 + x64);
+{ uint32_t x105 = (uint32_t) (x104 >> 0x13);
+{ uint32_t x106 = ((uint32_t)x104 & 0x7ffff);
+{ uint64_t x107 = (x105 + x63);
+{ uint32_t x108 = (uint32_t) (x107 >> 0x13);
+{ uint32_t x109 = ((uint32_t)x107 & 0x7ffff);
+{ uint64_t x110 = (x108 + x62);
+{ uint32_t x111 = (uint32_t) (x110 >> 0x13);
+{ uint32_t x112 = ((uint32_t)x110 & 0x7ffff);
+{ uint64_t x113 = (x111 + x61);
+{ uint32_t x114 = (uint32_t) (x113 >> 0x13);
+{ uint32_t x115 = ((uint32_t)x113 & 0x7ffff);
+{ uint64_t x116 = (x114 + x60);
+{ uint32_t x117 = (uint32_t) (x116 >> 0x13);
+{ uint32_t x118 = ((uint32_t)x116 & 0x7ffff);
+{ uint32_t x119 = (x76 + (0x9 * x117));
+{ uint32_t x120 = (x119 >> 0x13);
+{ uint32_t x121 = (x119 & 0x7ffff);
+{ uint32_t x122 = (x120 + x79);
+{ uint32_t x123 = (x122 >> 0x13);
+{ uint32_t x124 = (x122 & 0x7ffff);
+out[0] = x118;
+out[1] = x115;
+out[2] = x112;
+out[3] = x109;
+out[4] = x106;
+out[5] = x103;
+out[6] = x100;
+out[7] = x97;
+out[8] = x94;
+out[9] = x91;
+out[10] = x88;
+out[11] = x85;
+out[12] = x123 + x82;
+out[13] = x124;
+out[14] = x121;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/solinas32_2e285m9/femul.h b/src/Specific/solinas32_2e285m9/femul.h
new file mode 100644
index 000000000..5d9164651
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x58, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33);
diff --git a/src/Specific/solinas32_2e285m9/fesquare.c b/src/Specific/solinas32_2e285m9/fesquare.c
new file mode 100644
index 000000000..729af2c0a
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/fesquare.c
@@ -0,0 +1,101 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x29 = (((uint64_t)x2 * x27) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x27 * x2)))))))))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * ((uint64_t)x27 * x27)));
+{ uint64_t x31 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x27) + ((uint64_t)x27 * x28))));
+{ uint64_t x32 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x27) + (((uint64_t)x28 * x28) + ((uint64_t)x27 * x26)))));
+{ uint64_t x33 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x27) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + ((uint64_t)x27 * x24))))));
+{ uint64_t x34 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x27) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + ((uint64_t)x27 * x22)))))));
+{ uint64_t x35 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x27) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + ((uint64_t)x27 * x20))))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x27) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x27 * x18)))))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x27) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + ((uint64_t)x27 * x16))))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x27) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + ((uint64_t)x27 * x14)))))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x27) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + ((uint64_t)x27 * x12))))))))))));
+{ uint64_t x40 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x27) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + ((uint64_t)x27 * x10)))))))))))));
+{ uint64_t x41 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x27) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + ((uint64_t)x27 * x8))))))))))))));
+{ uint64_t x42 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x27) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + ((uint64_t)x27 * x6)))))))))))))));
+{ uint64_t x43 = (((uint64_t)x2 * x2) + (0x9 * (((uint64_t)x4 * x27) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + ((uint64_t)x27 * x4))))))))))))))));
+{ uint32_t x44 = (uint32_t) (x43 >> 0x13);
+{ uint32_t x45 = ((uint32_t)x43 & 0x7ffff);
+{ uint64_t x46 = (x44 + x42);
+{ uint32_t x47 = (uint32_t) (x46 >> 0x13);
+{ uint32_t x48 = ((uint32_t)x46 & 0x7ffff);
+{ uint64_t x49 = (x47 + x41);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x13);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7ffff);
+{ uint64_t x52 = (x50 + x40);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x13);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7ffff);
+{ uint64_t x55 = (x53 + x39);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x13);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7ffff);
+{ uint64_t x58 = (x56 + x38);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x13);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7ffff);
+{ uint64_t x61 = (x59 + x37);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x13);
+{ uint32_t x63 = ((uint32_t)x61 & 0x7ffff);
+{ uint64_t x64 = (x62 + x36);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x13);
+{ uint32_t x66 = ((uint32_t)x64 & 0x7ffff);
+{ uint64_t x67 = (x65 + x35);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x13);
+{ uint32_t x69 = ((uint32_t)x67 & 0x7ffff);
+{ uint64_t x70 = (x68 + x34);
+{ uint32_t x71 = (uint32_t) (x70 >> 0x13);
+{ uint32_t x72 = ((uint32_t)x70 & 0x7ffff);
+{ uint64_t x73 = (x71 + x33);
+{ uint32_t x74 = (uint32_t) (x73 >> 0x13);
+{ uint32_t x75 = ((uint32_t)x73 & 0x7ffff);
+{ uint64_t x76 = (x74 + x32);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x13);
+{ uint32_t x78 = ((uint32_t)x76 & 0x7ffff);
+{ uint64_t x79 = (x77 + x31);
+{ uint32_t x80 = (uint32_t) (x79 >> 0x13);
+{ uint32_t x81 = ((uint32_t)x79 & 0x7ffff);
+{ uint64_t x82 = (x80 + x30);
+{ uint32_t x83 = (uint32_t) (x82 >> 0x13);
+{ uint32_t x84 = ((uint32_t)x82 & 0x7ffff);
+{ uint64_t x85 = (x83 + x29);
+{ uint32_t x86 = (uint32_t) (x85 >> 0x13);
+{ uint32_t x87 = ((uint32_t)x85 & 0x7ffff);
+{ uint32_t x88 = (x45 + (0x9 * x86));
+{ uint32_t x89 = (x88 >> 0x13);
+{ uint32_t x90 = (x88 & 0x7ffff);
+{ uint32_t x91 = (x89 + x48);
+{ uint32_t x92 = (x91 >> 0x13);
+{ uint32_t x93 = (x91 & 0x7ffff);
+out[0] = x87;
+out[1] = x84;
+out[2] = x81;
+out[3] = x78;
+out[4] = x75;
+out[5] = x72;
+out[6] = x69;
+out[7] = x66;
+out[8] = x63;
+out[9] = x60;
+out[10] = x57;
+out[11] = x54;
+out[12] = x92 + x51;
+out[13] = x93;
+out[14] = x90;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[15];
diff --git a/src/Specific/solinas32_2e285m9/fesquare.h b/src/Specific/solinas32_2e285m9/fesquare.h
new file mode 100644
index 000000000..900a6956e
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e285m9/freeze.c b/src/Specific/solinas32_2e285m9/freeze.c
new file mode 100644
index 000000000..04cab1e28
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x30;
+out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 19 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e285m9/freeze.h b/src/Specific/solinas32_2e285m9/freeze.h
new file mode 100644
index 000000000..ffbccdea2
--- /dev/null
+++ b/src/Specific/solinas32_2e285m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e291m19/femul.c b/src/Specific/solinas32_2e291m19/femul.c
new file mode 100644
index 000000000..204f8c0ab
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((0x2 * ((uint64_t)x23 * x31)) + ((0x2 * ((uint64_t)x25 * x29)) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((0x2 * ((uint64_t)x23 * x29)) + ((uint64_t)x25 * x27))))))))))) + (0x13 * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x13 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + (((uint64_t)x13 * x35) + ((0x2 * ((uint64_t)x15 * x33)) + ((0x2 * ((uint64_t)x17 * x31)) + ((0x2 * ((uint64_t)x19 * x29)) + ((uint64_t)x21 * x27))))))))) + (0x13 * ((0x2 * ((uint64_t)x23 * x46)) + ((0x2 * ((uint64_t)x25 * x47)) + (0x2 * ((uint64_t)x24 * x45))))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((0x2 * ((uint64_t)x15 * x31)) + ((0x2 * ((uint64_t)x17 * x29)) + ((uint64_t)x19 * x27)))))))) + (0x13 * (((uint64_t)x21 * x46) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((0x2 * ((uint64_t)x15 * x29)) + ((uint64_t)x17 * x27))))))) + (0x13 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+{ uint64_t x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) + (0x13 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+{ uint64_t x55 = ((((uint64_t)x5 * x35) + ((0x2 * ((uint64_t)x7 * x33)) + ((0x2 * ((uint64_t)x9 * x31)) + ((0x2 * ((uint64_t)x11 * x29)) + ((uint64_t)x13 * x27))))) + (0x13 * ((0x2 * ((uint64_t)x15 * x46)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + (0x2 * ((uint64_t)x24 * x37))))))))));
+{ uint64_t x56 = ((((uint64_t)x5 * x33) + ((0x2 * ((uint64_t)x7 * x31)) + ((0x2 * ((uint64_t)x9 * x29)) + ((uint64_t)x11 * x27)))) + (0x13 * (((uint64_t)x13 * x46) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x24 * x35))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x31) + ((0x2 * ((uint64_t)x7 * x29)) + ((uint64_t)x9 * x27))) + (0x13 * (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) + (0x13 * (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ uint64_t x59 = (((uint64_t)x5 * x27) + (0x13 * ((0x2 * ((uint64_t)x7 * x46)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + (((uint64_t)x21 * x35) + ((0x2 * ((uint64_t)x23 * x33)) + ((0x2 * ((uint64_t)x25 * x31)) + (0x2 * ((uint64_t)x24 * x29))))))))))))));
+{ uint64_t x60 = (x59 >> 0x19);
+{ uint32_t x61 = ((uint32_t)x59 & 0x1ffffff);
+{ uint64_t x62 = (x60 + x58);
+{ uint64_t x63 = (x62 >> 0x18);
+{ uint32_t x64 = ((uint32_t)x62 & 0xffffff);
+{ uint64_t x65 = (x63 + x57);
+{ uint64_t x66 = (x65 >> 0x18);
+{ uint32_t x67 = ((uint32_t)x65 & 0xffffff);
+{ uint64_t x68 = (x66 + x56);
+{ uint64_t x69 = (x68 >> 0x18);
+{ uint32_t x70 = ((uint32_t)x68 & 0xffffff);
+{ uint64_t x71 = (x69 + x55);
+{ uint64_t x72 = (x71 >> 0x19);
+{ uint32_t x73 = ((uint32_t)x71 & 0x1ffffff);
+{ uint64_t x74 = (x72 + x54);
+{ uint64_t x75 = (x74 >> 0x18);
+{ uint32_t x76 = ((uint32_t)x74 & 0xffffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint64_t x78 = (x77 >> 0x18);
+{ uint32_t x79 = ((uint32_t)x77 & 0xffffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint64_t x81 = (x80 >> 0x18);
+{ uint32_t x82 = ((uint32_t)x80 & 0xffffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint64_t x84 = (x83 >> 0x19);
+{ uint32_t x85 = ((uint32_t)x83 & 0x1ffffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint64_t x87 = (x86 >> 0x18);
+{ uint32_t x88 = ((uint32_t)x86 & 0xffffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint64_t x90 = (x89 >> 0x18);
+{ uint32_t x91 = ((uint32_t)x89 & 0xffffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint64_t x93 = (x92 >> 0x18);
+{ uint32_t x94 = ((uint32_t)x92 & 0xffffff);
+{ uint64_t x95 = (x61 + (0x13 * x93));
+{ uint32_t x96 = (uint32_t) (x95 >> 0x19);
+{ uint32_t x97 = ((uint32_t)x95 & 0x1ffffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x18);
+{ uint32_t x100 = (x98 & 0xffffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e291m19/femul.h b/src/Specific/solinas32_2e291m19/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e291m19/fesquare.c b/src/Specific/solinas32_2e291m19/fesquare.c
new file mode 100644
index 000000000..49f5a676c
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x13 * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x13 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x13 * ((0x2 * ((uint64_t)x20 * x21)) + ((0x2 * ((uint64_t)x22 * x22)) + (0x2 * ((uint64_t)x21 * x20))))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x13 * (((uint64_t)x18 * x21) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x13 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+{ uint64_t x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x13 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x13 * ((0x2 * ((uint64_t)x12 * x21)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + (0x2 * ((uint64_t)x21 * x12))))))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x13 * (((uint64_t)x10 * x21) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((uint64_t)x21 * x10))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x13 * (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x13 * (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ uint64_t x34 = (((uint64_t)x2 * x2) + (0x13 * ((0x2 * ((uint64_t)x4 * x21)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + (((uint64_t)x10 * x18) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + (0x2 * ((uint64_t)x21 * x4))))))))))))));
+{ uint64_t x35 = (x34 >> 0x19);
+{ uint32_t x36 = ((uint32_t)x34 & 0x1ffffff);
+{ uint64_t x37 = (x35 + x33);
+{ uint64_t x38 = (x37 >> 0x18);
+{ uint32_t x39 = ((uint32_t)x37 & 0xffffff);
+{ uint64_t x40 = (x38 + x32);
+{ uint64_t x41 = (x40 >> 0x18);
+{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+{ uint64_t x43 = (x41 + x31);
+{ uint64_t x44 = (x43 >> 0x18);
+{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+{ uint64_t x46 = (x44 + x30);
+{ uint64_t x47 = (x46 >> 0x19);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1ffffff);
+{ uint64_t x49 = (x47 + x29);
+{ uint64_t x50 = (x49 >> 0x18);
+{ uint32_t x51 = ((uint32_t)x49 & 0xffffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x18);
+{ uint32_t x54 = ((uint32_t)x52 & 0xffffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint64_t x56 = (x55 >> 0x18);
+{ uint32_t x57 = ((uint32_t)x55 & 0xffffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint64_t x59 = (x58 >> 0x19);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint64_t x62 = (x61 >> 0x18);
+{ uint32_t x63 = ((uint32_t)x61 & 0xffffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint64_t x65 = (x64 >> 0x18);
+{ uint32_t x66 = ((uint32_t)x64 & 0xffffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint64_t x68 = (x67 >> 0x18);
+{ uint32_t x69 = ((uint32_t)x67 & 0xffffff);
+{ uint64_t x70 = (x36 + (0x13 * x68));
+{ uint32_t x71 = (uint32_t) (x70 >> 0x19);
+{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x18);
+{ uint32_t x75 = (x73 & 0xffffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e291m19/fesquare.h b/src/Specific/solinas32_2e291m19/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e291m19/freeze.c b/src/Specific/solinas32_2e291m19/freeze.c
new file mode 100644
index 000000000..bb62d2336
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 25 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e291m19/freeze.h b/src/Specific/solinas32_2e291m19/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e291m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e321m9/femul.c b/src/Specific/solinas32_2e321m9/femul.c
new file mode 100644
index 000000000..17f2e6012
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/femul.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x9 * ((uint64_t)x32 * x62)));
+{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x9 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x9 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x9 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x9 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x9 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x9 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x9 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x9 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x9 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x9 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x9 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x9 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x9 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x9 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+{ uint32_t x80 = (uint32_t) (x79 >> 0x15);
+{ uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
+{ uint64_t x82 = (x80 + x78);
+{ uint32_t x83 = (uint32_t) (x82 >> 0x14);
+{ uint32_t x84 = ((uint32_t)x82 & 0xfffff);
+{ uint64_t x85 = (x83 + x77);
+{ uint32_t x86 = (uint32_t) (x85 >> 0x14);
+{ uint32_t x87 = ((uint32_t)x85 & 0xfffff);
+{ uint64_t x88 = (x86 + x76);
+{ uint32_t x89 = (uint32_t) (x88 >> 0x14);
+{ uint32_t x90 = ((uint32_t)x88 & 0xfffff);
+{ uint64_t x91 = (x89 + x75);
+{ uint32_t x92 = (uint32_t) (x91 >> 0x14);
+{ uint32_t x93 = ((uint32_t)x91 & 0xfffff);
+{ uint64_t x94 = (x92 + x74);
+{ uint32_t x95 = (uint32_t) (x94 >> 0x14);
+{ uint32_t x96 = ((uint32_t)x94 & 0xfffff);
+{ uint64_t x97 = (x95 + x73);
+{ uint32_t x98 = (uint32_t) (x97 >> 0x14);
+{ uint32_t x99 = ((uint32_t)x97 & 0xfffff);
+{ uint64_t x100 = (x98 + x72);
+{ uint32_t x101 = (uint32_t) (x100 >> 0x14);
+{ uint32_t x102 = ((uint32_t)x100 & 0xfffff);
+{ uint64_t x103 = (x101 + x71);
+{ uint32_t x104 = (uint32_t) (x103 >> 0x14);
+{ uint32_t x105 = ((uint32_t)x103 & 0xfffff);
+{ uint64_t x106 = (x104 + x70);
+{ uint32_t x107 = (uint32_t) (x106 >> 0x14);
+{ uint32_t x108 = ((uint32_t)x106 & 0xfffff);
+{ uint64_t x109 = (x107 + x69);
+{ uint32_t x110 = (uint32_t) (x109 >> 0x14);
+{ uint32_t x111 = ((uint32_t)x109 & 0xfffff);
+{ uint64_t x112 = (x110 + x68);
+{ uint32_t x113 = (uint32_t) (x112 >> 0x14);
+{ uint32_t x114 = ((uint32_t)x112 & 0xfffff);
+{ uint64_t x115 = (x113 + x67);
+{ uint32_t x116 = (uint32_t) (x115 >> 0x14);
+{ uint32_t x117 = ((uint32_t)x115 & 0xfffff);
+{ uint64_t x118 = (x116 + x66);
+{ uint32_t x119 = (uint32_t) (x118 >> 0x14);
+{ uint32_t x120 = ((uint32_t)x118 & 0xfffff);
+{ uint64_t x121 = (x119 + x65);
+{ uint32_t x122 = (uint32_t) (x121 >> 0x14);
+{ uint32_t x123 = ((uint32_t)x121 & 0xfffff);
+{ uint64_t x124 = (x122 + x64);
+{ uint32_t x125 = (uint32_t) (x124 >> 0x14);
+{ uint32_t x126 = ((uint32_t)x124 & 0xfffff);
+{ uint32_t x127 = (x81 + (0x9 * x125));
+{ uint32_t x128 = (x127 >> 0x15);
+{ uint32_t x129 = (x127 & 0x1fffff);
+{ uint32_t x130 = (x128 + x84);
+{ uint32_t x131 = (x130 >> 0x14);
+{ uint32_t x132 = (x130 & 0xfffff);
+out[0] = x126;
+out[1] = x123;
+out[2] = x120;
+out[3] = x117;
+out[4] = x114;
+out[5] = x111;
+out[6] = x108;
+out[7] = x105;
+out[8] = x102;
+out[9] = x99;
+out[10] = x96;
+out[11] = x93;
+out[12] = x90;
+out[13] = x131 + x87;
+out[14] = x132;
+out[15] = x129;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e321m9/femul.h b/src/Specific/solinas32_2e321m9/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e321m9/femulDisplay.log b/src/Specific/solinas32_2e321m9/femulDisplay.log
index 7b3312579..e698eb990 100644
--- a/src/Specific/solinas32_2e321m9/femulDisplay.log
+++ b/src/Specific/solinas32_2e321m9/femulDisplay.log
@@ -2,75 +2,75 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
- uint64_t x64 = (uint64_t) x5 * x62 + (0x2 * ((uint64_t) x7 * x63) + (0x2 * ((uint64_t) x9 * x61) + (0x2 * ((uint64_t) x11 * x59) + (0x2 * ((uint64_t) x13 * x57) + (0x2 * ((uint64_t) x15 * x55) + (0x2 * ((uint64_t) x17 * x53) + (0x2 * ((uint64_t) x19 * x51) + (0x2 * ((uint64_t) x21 * x49) + (0x2 * ((uint64_t) x23 * x47) + (0x2 * ((uint64_t) x25 * x45) + (0x2 * ((uint64_t) x27 * x43) + (0x2 * ((uint64_t) x29 * x41) + (0x2 * ((uint64_t) x31 * x39) + (0x2 * ((uint64_t) x33 * x37) + (uint64_t) x32 * x35))))))))))))));
- uint64_t x65 = (uint64_t) x5 * x63 + (0x2 * ((uint64_t) x7 * x61) + (0x2 * ((uint64_t) x9 * x59) + (0x2 * ((uint64_t) x11 * x57) + (0x2 * ((uint64_t) x13 * x55) + (0x2 * ((uint64_t) x15 * x53) + (0x2 * ((uint64_t) x17 * x51) + (0x2 * ((uint64_t) x19 * x49) + (0x2 * ((uint64_t) x21 * x47) + (0x2 * ((uint64_t) x23 * x45) + (0x2 * ((uint64_t) x25 * x43) + (0x2 * ((uint64_t) x27 * x41) + (0x2 * ((uint64_t) x29 * x39) + (0x2 * ((uint64_t) x31 * x37) + (uint64_t) x33 * x35))))))))))))) + 0x9 * ((uint64_t) x32 * x62);
- uint64_t x66 = (uint64_t) x5 * x61 + (0x2 * ((uint64_t) x7 * x59) + (0x2 * ((uint64_t) x9 * x57) + (0x2 * ((uint64_t) x11 * x55) + (0x2 * ((uint64_t) x13 * x53) + (0x2 * ((uint64_t) x15 * x51) + (0x2 * ((uint64_t) x17 * x49) + (0x2 * ((uint64_t) x19 * x47) + (0x2 * ((uint64_t) x21 * x45) + (0x2 * ((uint64_t) x23 * x43) + (0x2 * ((uint64_t) x25 * x41) + (0x2 * ((uint64_t) x27 * x39) + (0x2 * ((uint64_t) x29 * x37) + (uint64_t) x31 * x35)))))))))))) + 0x9 * ((uint64_t) x33 * x62 + (uint64_t) x32 * x63);
- uint64_t x67 = (uint64_t) x5 * x59 + (0x2 * ((uint64_t) x7 * x57) + (0x2 * ((uint64_t) x9 * x55) + (0x2 * ((uint64_t) x11 * x53) + (0x2 * ((uint64_t) x13 * x51) + (0x2 * ((uint64_t) x15 * x49) + (0x2 * ((uint64_t) x17 * x47) + (0x2 * ((uint64_t) x19 * x45) + (0x2 * ((uint64_t) x21 * x43) + (0x2 * ((uint64_t) x23 * x41) + (0x2 * ((uint64_t) x25 * x39) + (0x2 * ((uint64_t) x27 * x37) + (uint64_t) x29 * x35))))))))))) + 0x9 * ((uint64_t) x31 * x62 + ((uint64_t) x33 * x63 + (uint64_t) x32 * x61));
- uint64_t x68 = (uint64_t) x5 * x57 + (0x2 * ((uint64_t) x7 * x55) + (0x2 * ((uint64_t) x9 * x53) + (0x2 * ((uint64_t) x11 * x51) + (0x2 * ((uint64_t) x13 * x49) + (0x2 * ((uint64_t) x15 * x47) + (0x2 * ((uint64_t) x17 * x45) + (0x2 * ((uint64_t) x19 * x43) + (0x2 * ((uint64_t) x21 * x41) + (0x2 * ((uint64_t) x23 * x39) + (0x2 * ((uint64_t) x25 * x37) + (uint64_t) x27 * x35)))))))))) + 0x9 * ((uint64_t) x29 * x62 + ((uint64_t) x31 * x63 + ((uint64_t) x33 * x61 + (uint64_t) x32 * x59)));
- uint64_t x69 = (uint64_t) x5 * x55 + (0x2 * ((uint64_t) x7 * x53) + (0x2 * ((uint64_t) x9 * x51) + (0x2 * ((uint64_t) x11 * x49) + (0x2 * ((uint64_t) x13 * x47) + (0x2 * ((uint64_t) x15 * x45) + (0x2 * ((uint64_t) x17 * x43) + (0x2 * ((uint64_t) x19 * x41) + (0x2 * ((uint64_t) x21 * x39) + (0x2 * ((uint64_t) x23 * x37) + (uint64_t) x25 * x35))))))))) + 0x9 * ((uint64_t) x27 * x62 + ((uint64_t) x29 * x63 + ((uint64_t) x31 * x61 + ((uint64_t) x33 * x59 + (uint64_t) x32 * x57))));
- uint64_t x70 = (uint64_t) x5 * x53 + (0x2 * ((uint64_t) x7 * x51) + (0x2 * ((uint64_t) x9 * x49) + (0x2 * ((uint64_t) x11 * x47) + (0x2 * ((uint64_t) x13 * x45) + (0x2 * ((uint64_t) x15 * x43) + (0x2 * ((uint64_t) x17 * x41) + (0x2 * ((uint64_t) x19 * x39) + (0x2 * ((uint64_t) x21 * x37) + (uint64_t) x23 * x35)))))))) + 0x9 * ((uint64_t) x25 * x62 + ((uint64_t) x27 * x63 + ((uint64_t) x29 * x61 + ((uint64_t) x31 * x59 + ((uint64_t) x33 * x57 + (uint64_t) x32 * x55)))));
- uint64_t x71 = (uint64_t) x5 * x51 + (0x2 * ((uint64_t) x7 * x49) + (0x2 * ((uint64_t) x9 * x47) + (0x2 * ((uint64_t) x11 * x45) + (0x2 * ((uint64_t) x13 * x43) + (0x2 * ((uint64_t) x15 * x41) + (0x2 * ((uint64_t) x17 * x39) + (0x2 * ((uint64_t) x19 * x37) + (uint64_t) x21 * x35))))))) + 0x9 * ((uint64_t) x23 * x62 + ((uint64_t) x25 * x63 + ((uint64_t) x27 * x61 + ((uint64_t) x29 * x59 + ((uint64_t) x31 * x57 + ((uint64_t) x33 * x55 + (uint64_t) x32 * x53))))));
- uint64_t x72 = (uint64_t) x5 * x49 + (0x2 * ((uint64_t) x7 * x47) + (0x2 * ((uint64_t) x9 * x45) + (0x2 * ((uint64_t) x11 * x43) + (0x2 * ((uint64_t) x13 * x41) + (0x2 * ((uint64_t) x15 * x39) + (0x2 * ((uint64_t) x17 * x37) + (uint64_t) x19 * x35)))))) + 0x9 * ((uint64_t) x21 * x62 + ((uint64_t) x23 * x63 + ((uint64_t) x25 * x61 + ((uint64_t) x27 * x59 + ((uint64_t) x29 * x57 + ((uint64_t) x31 * x55 + ((uint64_t) x33 * x53 + (uint64_t) x32 * x51)))))));
- uint64_t x73 = (uint64_t) x5 * x47 + (0x2 * ((uint64_t) x7 * x45) + (0x2 * ((uint64_t) x9 * x43) + (0x2 * ((uint64_t) x11 * x41) + (0x2 * ((uint64_t) x13 * x39) + (0x2 * ((uint64_t) x15 * x37) + (uint64_t) x17 * x35))))) + 0x9 * ((uint64_t) x19 * x62 + ((uint64_t) x21 * x63 + ((uint64_t) x23 * x61 + ((uint64_t) x25 * x59 + ((uint64_t) x27 * x57 + ((uint64_t) x29 * x55 + ((uint64_t) x31 * x53 + ((uint64_t) x33 * x51 + (uint64_t) x32 * x49))))))));
- uint64_t x74 = (uint64_t) x5 * x45 + (0x2 * ((uint64_t) x7 * x43) + (0x2 * ((uint64_t) x9 * x41) + (0x2 * ((uint64_t) x11 * x39) + (0x2 * ((uint64_t) x13 * x37) + (uint64_t) x15 * x35)))) + 0x9 * ((uint64_t) x17 * x62 + ((uint64_t) x19 * x63 + ((uint64_t) x21 * x61 + ((uint64_t) x23 * x59 + ((uint64_t) x25 * x57 + ((uint64_t) x27 * x55 + ((uint64_t) x29 * x53 + ((uint64_t) x31 * x51 + ((uint64_t) x33 * x49 + (uint64_t) x32 * x47)))))))));
- uint64_t x75 = (uint64_t) x5 * x43 + (0x2 * ((uint64_t) x7 * x41) + (0x2 * ((uint64_t) x9 * x39) + (0x2 * ((uint64_t) x11 * x37) + (uint64_t) x13 * x35))) + 0x9 * ((uint64_t) x15 * x62 + ((uint64_t) x17 * x63 + ((uint64_t) x19 * x61 + ((uint64_t) x21 * x59 + ((uint64_t) x23 * x57 + ((uint64_t) x25 * x55 + ((uint64_t) x27 * x53 + ((uint64_t) x29 * x51 + ((uint64_t) x31 * x49 + ((uint64_t) x33 * x47 + (uint64_t) x32 * x45))))))))));
- uint64_t x76 = (uint64_t) x5 * x41 + (0x2 * ((uint64_t) x7 * x39) + (0x2 * ((uint64_t) x9 * x37) + (uint64_t) x11 * x35)) + 0x9 * ((uint64_t) x13 * x62 + ((uint64_t) x15 * x63 + ((uint64_t) x17 * x61 + ((uint64_t) x19 * x59 + ((uint64_t) x21 * x57 + ((uint64_t) x23 * x55 + ((uint64_t) x25 * x53 + ((uint64_t) x27 * x51 + ((uint64_t) x29 * x49 + ((uint64_t) x31 * x47 + ((uint64_t) x33 * x45 + (uint64_t) x32 * x43)))))))))));
- uint64_t x77 = (uint64_t) x5 * x39 + (0x2 * ((uint64_t) x7 * x37) + (uint64_t) x9 * x35) + 0x9 * ((uint64_t) x11 * x62 + ((uint64_t) x13 * x63 + ((uint64_t) x15 * x61 + ((uint64_t) x17 * x59 + ((uint64_t) x19 * x57 + ((uint64_t) x21 * x55 + ((uint64_t) x23 * x53 + ((uint64_t) x25 * x51 + ((uint64_t) x27 * x49 + ((uint64_t) x29 * x47 + ((uint64_t) x31 * x45 + ((uint64_t) x33 * x43 + (uint64_t) x32 * x41))))))))))));
- uint64_t x78 = (uint64_t) x5 * x37 + (uint64_t) x7 * x35 + 0x9 * ((uint64_t) x9 * x62 + ((uint64_t) x11 * x63 + ((uint64_t) x13 * x61 + ((uint64_t) x15 * x59 + ((uint64_t) x17 * x57 + ((uint64_t) x19 * x55 + ((uint64_t) x21 * x53 + ((uint64_t) x23 * x51 + ((uint64_t) x25 * x49 + ((uint64_t) x27 * x47 + ((uint64_t) x29 * x45 + ((uint64_t) x31 * x43 + ((uint64_t) x33 * x41 + (uint64_t) x32 * x39)))))))))))));
- uint64_t x79 = (uint64_t) x5 * x35 + 0x9 * (0x2 * ((uint64_t) x7 * x62) + (0x2 * ((uint64_t) x9 * x63) + (0x2 * ((uint64_t) x11 * x61) + (0x2 * ((uint64_t) x13 * x59) + (0x2 * ((uint64_t) x15 * x57) + (0x2 * ((uint64_t) x17 * x55) + (0x2 * ((uint64_t) x19 * x53) + (0x2 * ((uint64_t) x21 * x51) + (0x2 * ((uint64_t) x23 * x49) + (0x2 * ((uint64_t) x25 * x47) + (0x2 * ((uint64_t) x27 * x45) + (0x2 * ((uint64_t) x29 * x43) + (0x2 * ((uint64_t) x31 * x41) + (0x2 * ((uint64_t) x33 * x39) + 0x2 * ((uint64_t) x32 * x37)))))))))))))));
+ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x9 * ((uint64_t)x32 * x62)));
+ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x9 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x9 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x9 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x9 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x9 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x9 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x9 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x9 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x9 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x9 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x9 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x9 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x9 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ uint64_t x79 = (((uint64_t)x5 * x35) + (0x9 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
uint32_t x80 = (uint32_t) (x79 >> 0x15);
- uint32_t x81 = (uint32_t) x79 & 0x1fffff;
- uint64_t x82 = x80 + x78;
+ uint32_t x81 = ((uint32_t)x79 & 0x1fffff);
+ uint64_t x82 = (x80 + x78);
uint32_t x83 = (uint32_t) (x82 >> 0x14);
- uint32_t x84 = (uint32_t) x82 & 0xfffff;
- uint64_t x85 = x83 + x77;
+ uint32_t x84 = ((uint32_t)x82 & 0xfffff);
+ uint64_t x85 = (x83 + x77);
uint32_t x86 = (uint32_t) (x85 >> 0x14);
- uint32_t x87 = (uint32_t) x85 & 0xfffff;
- uint64_t x88 = x86 + x76;
+ uint32_t x87 = ((uint32_t)x85 & 0xfffff);
+ uint64_t x88 = (x86 + x76);
uint32_t x89 = (uint32_t) (x88 >> 0x14);
- uint32_t x90 = (uint32_t) x88 & 0xfffff;
- uint64_t x91 = x89 + x75;
+ uint32_t x90 = ((uint32_t)x88 & 0xfffff);
+ uint64_t x91 = (x89 + x75);
uint32_t x92 = (uint32_t) (x91 >> 0x14);
- uint32_t x93 = (uint32_t) x91 & 0xfffff;
- uint64_t x94 = x92 + x74;
+ uint32_t x93 = ((uint32_t)x91 & 0xfffff);
+ uint64_t x94 = (x92 + x74);
uint32_t x95 = (uint32_t) (x94 >> 0x14);
- uint32_t x96 = (uint32_t) x94 & 0xfffff;
- uint64_t x97 = x95 + x73;
+ uint32_t x96 = ((uint32_t)x94 & 0xfffff);
+ uint64_t x97 = (x95 + x73);
uint32_t x98 = (uint32_t) (x97 >> 0x14);
- uint32_t x99 = (uint32_t) x97 & 0xfffff;
- uint64_t x100 = x98 + x72;
+ uint32_t x99 = ((uint32_t)x97 & 0xfffff);
+ uint64_t x100 = (x98 + x72);
uint32_t x101 = (uint32_t) (x100 >> 0x14);
- uint32_t x102 = (uint32_t) x100 & 0xfffff;
- uint64_t x103 = x101 + x71;
+ uint32_t x102 = ((uint32_t)x100 & 0xfffff);
+ uint64_t x103 = (x101 + x71);
uint32_t x104 = (uint32_t) (x103 >> 0x14);
- uint32_t x105 = (uint32_t) x103 & 0xfffff;
- uint64_t x106 = x104 + x70;
+ uint32_t x105 = ((uint32_t)x103 & 0xfffff);
+ uint64_t x106 = (x104 + x70);
uint32_t x107 = (uint32_t) (x106 >> 0x14);
- uint32_t x108 = (uint32_t) x106 & 0xfffff;
- uint64_t x109 = x107 + x69;
+ uint32_t x108 = ((uint32_t)x106 & 0xfffff);
+ uint64_t x109 = (x107 + x69);
uint32_t x110 = (uint32_t) (x109 >> 0x14);
- uint32_t x111 = (uint32_t) x109 & 0xfffff;
- uint64_t x112 = x110 + x68;
+ uint32_t x111 = ((uint32_t)x109 & 0xfffff);
+ uint64_t x112 = (x110 + x68);
uint32_t x113 = (uint32_t) (x112 >> 0x14);
- uint32_t x114 = (uint32_t) x112 & 0xfffff;
- uint64_t x115 = x113 + x67;
+ uint32_t x114 = ((uint32_t)x112 & 0xfffff);
+ uint64_t x115 = (x113 + x67);
uint32_t x116 = (uint32_t) (x115 >> 0x14);
- uint32_t x117 = (uint32_t) x115 & 0xfffff;
- uint64_t x118 = x116 + x66;
+ uint32_t x117 = ((uint32_t)x115 & 0xfffff);
+ uint64_t x118 = (x116 + x66);
uint32_t x119 = (uint32_t) (x118 >> 0x14);
- uint32_t x120 = (uint32_t) x118 & 0xfffff;
- uint64_t x121 = x119 + x65;
+ uint32_t x120 = ((uint32_t)x118 & 0xfffff);
+ uint64_t x121 = (x119 + x65);
uint32_t x122 = (uint32_t) (x121 >> 0x14);
- uint32_t x123 = (uint32_t) x121 & 0xfffff;
- uint64_t x124 = x122 + x64;
+ uint32_t x123 = ((uint32_t)x121 & 0xfffff);
+ uint64_t x124 = (x122 + x64);
uint32_t x125 = (uint32_t) (x124 >> 0x14);
- uint32_t x126 = (uint32_t) x124 & 0xfffff;
- uint32_t x127 = x81 + 0x9 * x125;
- uint32_t x128 = x127 >> 0x15;
- uint32_t x129 = x127 & 0x1fffff;
- uint32_t x130 = x128 + x84;
- uint32_t x131 = x130 >> 0x14;
- uint32_t x132 = x130 & 0xfffff;
- return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, x131 + x87, Return x132, Return x129))
+ uint32_t x126 = ((uint32_t)x124 & 0xfffff);
+ uint32_t x127 = (x81 + (0x9 * x125));
+ uint32_t x128 = (x127 >> 0x15);
+ uint32_t x129 = (x127 & 0x1fffff);
+ uint32_t x130 = (x128 + x84);
+ uint32_t x131 = (x130 >> 0x14);
+ uint32_t x132 = (x130 & 0xfffff);
+ return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, (x131 + x87), Return x132, Return x129))
(x, x0)%core
: word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e321m9/fesquare.c b/src/Specific/solinas32_2e321m9/fesquare.c
new file mode 100644
index 000000000..9cfed4a93
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/fesquare.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x9 * ((uint64_t)x29 * x29)));
+{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x9 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x9 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x9 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x9 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x9 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x9 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x9 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x9 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x9 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x9 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x9 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x9 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x9 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x9 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+{ uint32_t x47 = (uint32_t) (x46 >> 0x15);
+{ uint32_t x48 = ((uint32_t)x46 & 0x1fffff);
+{ uint64_t x49 = (x47 + x45);
+{ uint32_t x50 = (uint32_t) (x49 >> 0x14);
+{ uint32_t x51 = ((uint32_t)x49 & 0xfffff);
+{ uint64_t x52 = (x50 + x44);
+{ uint32_t x53 = (uint32_t) (x52 >> 0x14);
+{ uint32_t x54 = ((uint32_t)x52 & 0xfffff);
+{ uint64_t x55 = (x53 + x43);
+{ uint32_t x56 = (uint32_t) (x55 >> 0x14);
+{ uint32_t x57 = ((uint32_t)x55 & 0xfffff);
+{ uint64_t x58 = (x56 + x42);
+{ uint32_t x59 = (uint32_t) (x58 >> 0x14);
+{ uint32_t x60 = ((uint32_t)x58 & 0xfffff);
+{ uint64_t x61 = (x59 + x41);
+{ uint32_t x62 = (uint32_t) (x61 >> 0x14);
+{ uint32_t x63 = ((uint32_t)x61 & 0xfffff);
+{ uint64_t x64 = (x62 + x40);
+{ uint32_t x65 = (uint32_t) (x64 >> 0x14);
+{ uint32_t x66 = ((uint32_t)x64 & 0xfffff);
+{ uint64_t x67 = (x65 + x39);
+{ uint32_t x68 = (uint32_t) (x67 >> 0x14);
+{ uint32_t x69 = ((uint32_t)x67 & 0xfffff);
+{ uint64_t x70 = (x68 + x38);
+{ uint32_t x71 = (uint32_t) (x70 >> 0x14);
+{ uint32_t x72 = ((uint32_t)x70 & 0xfffff);
+{ uint64_t x73 = (x71 + x37);
+{ uint32_t x74 = (uint32_t) (x73 >> 0x14);
+{ uint32_t x75 = ((uint32_t)x73 & 0xfffff);
+{ uint64_t x76 = (x74 + x36);
+{ uint32_t x77 = (uint32_t) (x76 >> 0x14);
+{ uint32_t x78 = ((uint32_t)x76 & 0xfffff);
+{ uint64_t x79 = (x77 + x35);
+{ uint32_t x80 = (uint32_t) (x79 >> 0x14);
+{ uint32_t x81 = ((uint32_t)x79 & 0xfffff);
+{ uint64_t x82 = (x80 + x34);
+{ uint32_t x83 = (uint32_t) (x82 >> 0x14);
+{ uint32_t x84 = ((uint32_t)x82 & 0xfffff);
+{ uint64_t x85 = (x83 + x33);
+{ uint32_t x86 = (uint32_t) (x85 >> 0x14);
+{ uint32_t x87 = ((uint32_t)x85 & 0xfffff);
+{ uint64_t x88 = (x86 + x32);
+{ uint32_t x89 = (uint32_t) (x88 >> 0x14);
+{ uint32_t x90 = ((uint32_t)x88 & 0xfffff);
+{ uint64_t x91 = (x89 + x31);
+{ uint32_t x92 = (uint32_t) (x91 >> 0x14);
+{ uint32_t x93 = ((uint32_t)x91 & 0xfffff);
+{ uint32_t x94 = (x48 + (0x9 * x92));
+{ uint32_t x95 = (x94 >> 0x15);
+{ uint32_t x96 = (x94 & 0x1fffff);
+{ uint32_t x97 = (x95 + x51);
+{ uint32_t x98 = (x97 >> 0x14);
+{ uint32_t x99 = (x97 & 0xfffff);
+out[0] = x93;
+out[1] = x90;
+out[2] = x87;
+out[3] = x84;
+out[4] = x81;
+out[5] = x78;
+out[6] = x75;
+out[7] = x72;
+out[8] = x69;
+out[9] = x66;
+out[10] = x63;
+out[11] = x60;
+out[12] = x57;
+out[13] = x98 + x54;
+out[14] = x99;
+out[15] = x96;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e321m9/fesquare.h b/src/Specific/solinas32_2e321m9/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e321m9/freeze.c b/src/Specific/solinas32_2e321m9/freeze.c
new file mode 100644
index 000000000..d44bc9b83
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 21 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e321m9/freeze.h b/src/Specific/solinas32_2e321m9/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e321m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e322m2e161m1/femul.c b/src/Specific/solinas32_2e322m2e161m1/femul.c
new file mode 100644
index 000000000..7cfe5c54c
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/femul.c
@@ -0,0 +1,119 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
+{ uint64_t x56 = (((uint64_t)(x17 + x28) * (x43 + x54)) - ((uint64_t)x17 * x43));
+{ uint64_t x57 = ((((uint64_t)(x15 + x29) * (x43 + x54)) + ((uint64_t)(x17 + x28) * (x41 + x55))) - (((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)));
+{ uint64_t x58 = ((((uint64_t)(x13 + x27) * (x43 + x54)) + (((uint64_t)(x15 + x29) * (x41 + x55)) + ((uint64_t)(x17 + x28) * (x39 + x53)))) - (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))));
+{ uint64_t x59 = ((((uint64_t)(x11 + x25) * (x43 + x54)) + (((uint64_t)(x13 + x27) * (x41 + x55)) + (((uint64_t)(x15 + x29) * (x39 + x53)) + ((uint64_t)(x17 + x28) * (x37 + x51))))) - (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))));
+{ uint64_t x60 = ((((uint64_t)(x9 + x23) * (x43 + x54)) + (((uint64_t)(x11 + x25) * (x41 + x55)) + (((uint64_t)(x13 + x27) * (x39 + x53)) + (((uint64_t)(x15 + x29) * (x37 + x51)) + ((uint64_t)(x17 + x28) * (x35 + x49)))))) - (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))));
+{ uint64_t x61 = ((((uint64_t)(x7 + x21) * (x43 + x54)) + (((uint64_t)(x9 + x23) * (x41 + x55)) + (((uint64_t)(x11 + x25) * (x39 + x53)) + (((uint64_t)(x13 + x27) * (x37 + x51)) + (((uint64_t)(x15 + x29) * (x35 + x49)) + ((uint64_t)(x17 + x28) * (x33 + x47))))))) - (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))));
+{ uint64_t x62 = ((((uint64_t)(x5 + x19) * (x43 + x54)) + (((uint64_t)(x7 + x21) * (x41 + x55)) + (((uint64_t)(x9 + x23) * (x39 + x53)) + (((uint64_t)(x11 + x25) * (x37 + x51)) + (((uint64_t)(x13 + x27) * (x35 + x49)) + (((uint64_t)(x15 + x29) * (x33 + x47)) + ((uint64_t)(x17 + x28) * (x31 + x45)))))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))));
+{ uint64_t x63 = ((((uint64_t)(x5 + x19) * (x41 + x55)) + (((uint64_t)(x7 + x21) * (x39 + x53)) + (((uint64_t)(x9 + x23) * (x37 + x51)) + (((uint64_t)(x11 + x25) * (x35 + x49)) + (((uint64_t)(x13 + x27) * (x33 + x47)) + ((uint64_t)(x15 + x29) * (x31 + x45))))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))));
+{ uint64_t x64 = ((((uint64_t)(x5 + x19) * (x39 + x53)) + (((uint64_t)(x7 + x21) * (x37 + x51)) + (((uint64_t)(x9 + x23) * (x35 + x49)) + (((uint64_t)(x11 + x25) * (x33 + x47)) + ((uint64_t)(x13 + x27) * (x31 + x45)))))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))));
+{ uint64_t x65 = ((((uint64_t)(x5 + x19) * (x37 + x51)) + (((uint64_t)(x7 + x21) * (x35 + x49)) + (((uint64_t)(x9 + x23) * (x33 + x47)) + ((uint64_t)(x11 + x25) * (x31 + x45))))) - (((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))));
+{ uint64_t x66 = ((((uint64_t)(x5 + x19) * (x35 + x49)) + (((uint64_t)(x7 + x21) * (x33 + x47)) + ((uint64_t)(x9 + x23) * (x31 + x45)))) - (((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))));
+{ uint64_t x67 = ((((uint64_t)(x5 + x19) * (x33 + x47)) + ((uint64_t)(x7 + x21) * (x31 + x45))) - (((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)));
+{ uint64_t x68 = (((uint64_t)(x5 + x19) * (x31 + x45)) - ((uint64_t)x5 * x31));
+{ uint64_t x69 = (((((uint64_t)x17 * x43) + ((uint64_t)x28 * x54)) + x63) + x56);
+{ uint64_t x70 = ((((((uint64_t)x15 * x43) + ((uint64_t)x17 * x41)) + (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))) + x64) + x57);
+{ uint64_t x71 = ((((((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + ((uint64_t)x17 * x39))) + (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))) + x65) + x58);
+{ uint64_t x72 = ((((((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + ((uint64_t)x17 * x37)))) + (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))) + x66) + x59);
+{ uint64_t x73 = ((((((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))) + (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))) + x67) + x60);
+{ uint64_t x74 = ((((((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + ((uint64_t)x17 * x33)))))) + (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))) + x68) + x61);
+{ uint64_t x75 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45))))))));
+{ uint64_t x76 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x29 * x45))))))) + x56);
+{ uint64_t x77 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + ((uint64_t)x27 * x45)))))) + x57);
+{ uint64_t x78 = (((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + ((uint64_t)x25 * x45))))) + x58);
+{ uint64_t x79 = (((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((uint64_t)x23 * x45)))) + x59);
+{ uint64_t x80 = (((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (((uint64_t)x19 * x47) + ((uint64_t)x21 * x45))) + x60);
+{ uint64_t x81 = ((((uint64_t)x5 * x31) + ((uint64_t)x19 * x45)) + x61);
+{ uint32_t x82 = (uint32_t) (x75 >> 0x17);
+{ uint32_t x83 = ((uint32_t)x75 & 0x7fffff);
+{ uint32_t x84 = (uint32_t) (x62 >> 0x17);
+{ uint32_t x85 = ((uint32_t)x62 & 0x7fffff);
+{ uint64_t x86 = (((uint64_t)0x800000 * x84) + x85);
+{ uint32_t x87 = (uint32_t) (x86 >> 0x17);
+{ uint32_t x88 = ((uint32_t)x86 & 0x7fffff);
+{ uint64_t x89 = ((x82 + x74) + x87);
+{ uint32_t x90 = (uint32_t) (x89 >> 0x17);
+{ uint32_t x91 = ((uint32_t)x89 & 0x7fffff);
+{ uint64_t x92 = (x81 + x87);
+{ uint32_t x93 = (uint32_t) (x92 >> 0x17);
+{ uint32_t x94 = ((uint32_t)x92 & 0x7fffff);
+{ uint64_t x95 = (x90 + x73);
+{ uint32_t x96 = (uint32_t) (x95 >> 0x17);
+{ uint32_t x97 = ((uint32_t)x95 & 0x7fffff);
+{ uint64_t x98 = (x93 + x80);
+{ uint32_t x99 = (uint32_t) (x98 >> 0x17);
+{ uint32_t x100 = ((uint32_t)x98 & 0x7fffff);
+{ uint64_t x101 = (x96 + x72);
+{ uint32_t x102 = (uint32_t) (x101 >> 0x17);
+{ uint32_t x103 = ((uint32_t)x101 & 0x7fffff);
+{ uint64_t x104 = (x99 + x79);
+{ uint32_t x105 = (uint32_t) (x104 >> 0x17);
+{ uint32_t x106 = ((uint32_t)x104 & 0x7fffff);
+{ uint64_t x107 = (x102 + x71);
+{ uint32_t x108 = (uint32_t) (x107 >> 0x17);
+{ uint32_t x109 = ((uint32_t)x107 & 0x7fffff);
+{ uint64_t x110 = (x105 + x78);
+{ uint32_t x111 = (uint32_t) (x110 >> 0x17);
+{ uint32_t x112 = ((uint32_t)x110 & 0x7fffff);
+{ uint64_t x113 = (x108 + x70);
+{ uint32_t x114 = (uint32_t) (x113 >> 0x17);
+{ uint32_t x115 = ((uint32_t)x113 & 0x7fffff);
+{ uint64_t x116 = (x111 + x77);
+{ uint32_t x117 = (uint32_t) (x116 >> 0x17);
+{ uint32_t x118 = ((uint32_t)x116 & 0x7fffff);
+{ uint64_t x119 = (x114 + x69);
+{ uint32_t x120 = (uint32_t) (x119 >> 0x17);
+{ uint32_t x121 = ((uint32_t)x119 & 0x7fffff);
+{ uint64_t x122 = (x117 + x76);
+{ uint32_t x123 = (uint32_t) (x122 >> 0x17);
+{ uint32_t x124 = ((uint32_t)x122 & 0x7fffff);
+{ uint32_t x125 = (x120 + x88);
+{ uint32_t x126 = (x125 >> 0x17);
+{ uint32_t x127 = (x125 & 0x7fffff);
+{ uint32_t x128 = (x123 + x83);
+{ uint32_t x129 = (x128 >> 0x17);
+{ uint32_t x130 = (x128 & 0x7fffff);
+{ uint32_t x131 = ((0x800000 * x126) + x127);
+{ uint32_t x132 = (x131 >> 0x17);
+{ uint32_t x133 = (x131 & 0x7fffff);
+{ uint32_t x134 = ((x129 + x91) + x132);
+{ uint32_t x135 = (x134 >> 0x17);
+{ uint32_t x136 = (x134 & 0x7fffff);
+{ uint32_t x137 = (x94 + x132);
+{ uint32_t x138 = (x137 >> 0x17);
+{ uint32_t x139 = (x137 & 0x7fffff);
+out[0] = x133;
+out[1] = x121;
+out[2] = x115;
+out[3] = x109;
+out[4] = x103;
+out[5] = x135 + x97;
+out[6] = x136;
+out[7] = x130;
+out[8] = x124;
+out[9] = x118;
+out[10] = x112;
+out[11] = x106;
+out[12] = x138 + x100;
+out[13] = x139;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/solinas32_2e322m2e161m1/femul.h b/src/Specific/solinas32_2e322m2e161m1/femul.h
new file mode 100644
index 000000000..6fdc2d29d
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31);
diff --git a/src/Specific/solinas32_2e322m2e161m1/fesquare.c b/src/Specific/solinas32_2e322m2e161m1/fesquare.c
new file mode 100644
index 000000000..d745c603b
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/fesquare.c
@@ -0,0 +1,119 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x27 = (((uint64_t)(x14 + x25) * (x14 + x25)) - ((uint64_t)x14 * x14));
+{ uint64_t x28 = ((((uint64_t)(x12 + x26) * (x14 + x25)) + ((uint64_t)(x14 + x25) * (x12 + x26))) - (((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)));
+{ uint64_t x29 = ((((uint64_t)(x10 + x24) * (x14 + x25)) + (((uint64_t)(x12 + x26) * (x12 + x26)) + ((uint64_t)(x14 + x25) * (x10 + x24)))) - (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))));
+{ uint64_t x30 = ((((uint64_t)(x8 + x22) * (x14 + x25)) + (((uint64_t)(x10 + x24) * (x12 + x26)) + (((uint64_t)(x12 + x26) * (x10 + x24)) + ((uint64_t)(x14 + x25) * (x8 + x22))))) - (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))));
+{ uint64_t x31 = ((((uint64_t)(x6 + x20) * (x14 + x25)) + (((uint64_t)(x8 + x22) * (x12 + x26)) + (((uint64_t)(x10 + x24) * (x10 + x24)) + (((uint64_t)(x12 + x26) * (x8 + x22)) + ((uint64_t)(x14 + x25) * (x6 + x20)))))) - (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))));
+{ uint64_t x32 = ((((uint64_t)(x4 + x18) * (x14 + x25)) + (((uint64_t)(x6 + x20) * (x12 + x26)) + (((uint64_t)(x8 + x22) * (x10 + x24)) + (((uint64_t)(x10 + x24) * (x8 + x22)) + (((uint64_t)(x12 + x26) * (x6 + x20)) + ((uint64_t)(x14 + x25) * (x4 + x18))))))) - (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))));
+{ uint64_t x33 = ((((uint64_t)(x2 + x16) * (x14 + x25)) + (((uint64_t)(x4 + x18) * (x12 + x26)) + (((uint64_t)(x6 + x20) * (x10 + x24)) + (((uint64_t)(x8 + x22) * (x8 + x22)) + (((uint64_t)(x10 + x24) * (x6 + x20)) + (((uint64_t)(x12 + x26) * (x4 + x18)) + ((uint64_t)(x14 + x25) * (x2 + x16)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
+{ uint64_t x34 = ((((uint64_t)(x2 + x16) * (x12 + x26)) + (((uint64_t)(x4 + x18) * (x10 + x24)) + (((uint64_t)(x6 + x20) * (x8 + x22)) + (((uint64_t)(x8 + x22) * (x6 + x20)) + (((uint64_t)(x10 + x24) * (x4 + x18)) + ((uint64_t)(x12 + x26) * (x2 + x16))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
+{ uint64_t x35 = ((((uint64_t)(x2 + x16) * (x10 + x24)) + (((uint64_t)(x4 + x18) * (x8 + x22)) + (((uint64_t)(x6 + x20) * (x6 + x20)) + (((uint64_t)(x8 + x22) * (x4 + x18)) + ((uint64_t)(x10 + x24) * (x2 + x16)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+{ uint64_t x36 = ((((uint64_t)(x2 + x16) * (x8 + x22)) + (((uint64_t)(x4 + x18) * (x6 + x20)) + (((uint64_t)(x6 + x20) * (x4 + x18)) + ((uint64_t)(x8 + x22) * (x2 + x16))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+{ uint64_t x37 = ((((uint64_t)(x2 + x16) * (x6 + x20)) + (((uint64_t)(x4 + x18) * (x4 + x18)) + ((uint64_t)(x6 + x20) * (x2 + x16)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+{ uint64_t x38 = ((((uint64_t)(x2 + x16) * (x4 + x18)) + ((uint64_t)(x4 + x18) * (x2 + x16))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+{ uint64_t x39 = (((uint64_t)(x2 + x16) * (x2 + x16)) - ((uint64_t)x2 * x2));
+{ uint64_t x40 = (((((uint64_t)x14 * x14) + ((uint64_t)x25 * x25)) + x34) + x27);
+{ uint64_t x41 = ((((((uint64_t)x12 * x14) + ((uint64_t)x14 * x12)) + (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))) + x35) + x28);
+{ uint64_t x42 = ((((((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + ((uint64_t)x14 * x10))) + (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))) + x36) + x29);
+{ uint64_t x43 = ((((((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + ((uint64_t)x14 * x8)))) + (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))) + x37) + x30);
+{ uint64_t x44 = ((((((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + ((uint64_t)x14 * x6))))) + (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))) + x38) + x31);
+{ uint64_t x45 = ((((((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + ((uint64_t)x14 * x4)))))) + (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))) + x39) + x32);
+{ uint64_t x46 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16))))))));
+{ uint64_t x47 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x26 * x16))))))) + x27);
+{ uint64_t x48 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x24 * x16)))))) + x28);
+{ uint64_t x49 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + ((uint64_t)x22 * x16))))) + x29);
+{ uint64_t x50 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + ((uint64_t)x20 * x16)))) + x30);
+{ uint64_t x51 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x16 * x18) + ((uint64_t)x18 * x16))) + x31);
+{ uint64_t x52 = ((((uint64_t)x2 * x2) + ((uint64_t)x16 * x16)) + x32);
+{ uint32_t x53 = (uint32_t) (x46 >> 0x17);
+{ uint32_t x54 = ((uint32_t)x46 & 0x7fffff);
+{ uint32_t x55 = (uint32_t) (x33 >> 0x17);
+{ uint32_t x56 = ((uint32_t)x33 & 0x7fffff);
+{ uint64_t x57 = (((uint64_t)0x800000 * x55) + x56);
+{ uint32_t x58 = (uint32_t) (x57 >> 0x17);
+{ uint32_t x59 = ((uint32_t)x57 & 0x7fffff);
+{ uint64_t x60 = ((x53 + x45) + x58);
+{ uint32_t x61 = (uint32_t) (x60 >> 0x17);
+{ uint32_t x62 = ((uint32_t)x60 & 0x7fffff);
+{ uint64_t x63 = (x52 + x58);
+{ uint32_t x64 = (uint32_t) (x63 >> 0x17);
+{ uint32_t x65 = ((uint32_t)x63 & 0x7fffff);
+{ uint64_t x66 = (x61 + x44);
+{ uint32_t x67 = (uint32_t) (x66 >> 0x17);
+{ uint32_t x68 = ((uint32_t)x66 & 0x7fffff);
+{ uint64_t x69 = (x64 + x51);
+{ uint32_t x70 = (uint32_t) (x69 >> 0x17);
+{ uint32_t x71 = ((uint32_t)x69 & 0x7fffff);
+{ uint64_t x72 = (x67 + x43);
+{ uint32_t x73 = (uint32_t) (x72 >> 0x17);
+{ uint32_t x74 = ((uint32_t)x72 & 0x7fffff);
+{ uint64_t x75 = (x70 + x50);
+{ uint32_t x76 = (uint32_t) (x75 >> 0x17);
+{ uint32_t x77 = ((uint32_t)x75 & 0x7fffff);
+{ uint64_t x78 = (x73 + x42);
+{ uint32_t x79 = (uint32_t) (x78 >> 0x17);
+{ uint32_t x80 = ((uint32_t)x78 & 0x7fffff);
+{ uint64_t x81 = (x76 + x49);
+{ uint32_t x82 = (uint32_t) (x81 >> 0x17);
+{ uint32_t x83 = ((uint32_t)x81 & 0x7fffff);
+{ uint64_t x84 = (x79 + x41);
+{ uint32_t x85 = (uint32_t) (x84 >> 0x17);
+{ uint32_t x86 = ((uint32_t)x84 & 0x7fffff);
+{ uint64_t x87 = (x82 + x48);
+{ uint32_t x88 = (uint32_t) (x87 >> 0x17);
+{ uint32_t x89 = ((uint32_t)x87 & 0x7fffff);
+{ uint64_t x90 = (x85 + x40);
+{ uint32_t x91 = (uint32_t) (x90 >> 0x17);
+{ uint32_t x92 = ((uint32_t)x90 & 0x7fffff);
+{ uint64_t x93 = (x88 + x47);
+{ uint32_t x94 = (uint32_t) (x93 >> 0x17);
+{ uint32_t x95 = ((uint32_t)x93 & 0x7fffff);
+{ uint32_t x96 = (x91 + x59);
+{ uint32_t x97 = (x96 >> 0x17);
+{ uint32_t x98 = (x96 & 0x7fffff);
+{ uint32_t x99 = (x94 + x54);
+{ uint32_t x100 = (x99 >> 0x17);
+{ uint32_t x101 = (x99 & 0x7fffff);
+{ uint32_t x102 = ((0x800000 * x97) + x98);
+{ uint32_t x103 = (x102 >> 0x17);
+{ uint32_t x104 = (x102 & 0x7fffff);
+{ uint32_t x105 = ((x100 + x62) + x103);
+{ uint32_t x106 = (x105 >> 0x17);
+{ uint32_t x107 = (x105 & 0x7fffff);
+{ uint32_t x108 = (x65 + x103);
+{ uint32_t x109 = (x108 >> 0x17);
+{ uint32_t x110 = (x108 & 0x7fffff);
+out[0] = x104;
+out[1] = x92;
+out[2] = x86;
+out[3] = x80;
+out[4] = x74;
+out[5] = x106 + x68;
+out[6] = x107;
+out[7] = x101;
+out[8] = x95;
+out[9] = x89;
+out[10] = x83;
+out[11] = x77;
+out[12] = x109 + x71;
+out[13] = x110;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/solinas32_2e322m2e161m1/fesquare.h b/src/Specific/solinas32_2e322m2e161m1/fesquare.h
new file mode 100644
index 000000000..5c03beeab
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e322m2e161m1/freeze.c b/src/Specific/solinas32_2e322m2e161m1/freeze.c
new file mode 100644
index 000000000..b157048ca
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x28;
+out[1] = uint8_t x29 = Op Syntax.SubWithGetBorrow 23 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e322m2e161m1/freeze.h b/src/Specific/solinas32_2e322m2e161m1/freeze.h
new file mode 100644
index 000000000..0fda81e7a
--- /dev/null
+++ b/src/Specific/solinas32_2e322m2e161m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e336m17/femul.c b/src/Specific/solinas32_2e336m17/femul.c
new file mode 100644
index 000000000..c7117de97
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/femul.c
@@ -0,0 +1,96 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31)
+{ uint64_t x56 = (((uint64_t)x5 * x54) + (((uint64_t)x7 * x55) + (((uint64_t)x9 * x53) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + (((uint64_t)x29 * x33) + ((uint64_t)x28 * x31))))))))))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x55) + (((uint64_t)x7 * x53) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x29 * x31))))))))))))) + (0x11 * ((uint64_t)x28 * x54)));
+{ uint64_t x58 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x27 * x31)))))))))))) + (0x11 * (((uint64_t)x29 * x54) + ((uint64_t)x28 * x55))));
+{ uint64_t x59 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + ((uint64_t)x25 * x31))))))))))) + (0x11 * (((uint64_t)x27 * x54) + (((uint64_t)x29 * x55) + ((uint64_t)x28 * x53)))));
+{ uint64_t x60 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + ((uint64_t)x23 * x31)))))))))) + (0x11 * (((uint64_t)x25 * x54) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x28 * x51))))));
+{ uint64_t x61 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + ((uint64_t)x21 * x31))))))))) + (0x11 * (((uint64_t)x23 * x54) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + ((uint64_t)x28 * x49)))))));
+{ uint64_t x62 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + ((uint64_t)x19 * x31)))))))) + (0x11 * (((uint64_t)x21 * x54) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((uint64_t)x28 * x47))))))));
+{ uint64_t x63 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + ((uint64_t)x17 * x31))))))) + (0x11 * (((uint64_t)x19 * x54) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((uint64_t)x28 * x45)))))))));
+{ uint64_t x64 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + ((uint64_t)x15 * x31)))))) + (0x11 * (((uint64_t)x17 * x54) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + ((uint64_t)x28 * x43))))))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + ((uint64_t)x13 * x31))))) + (0x11 * (((uint64_t)x15 * x54) + (((uint64_t)x17 * x55) + (((uint64_t)x19 * x53) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + ((uint64_t)x28 * x41)))))))))));
+{ uint64_t x66 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + ((uint64_t)x11 * x31)))) + (0x11 * (((uint64_t)x13 * x54) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((uint64_t)x28 * x39))))))))))));
+{ uint64_t x67 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + ((uint64_t)x9 * x31))) + (0x11 * (((uint64_t)x11 * x54) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((uint64_t)x28 * x37)))))))))))));
+{ uint64_t x68 = ((((uint64_t)x5 * x33) + ((uint64_t)x7 * x31)) + (0x11 * (((uint64_t)x9 * x54) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x28 * x35))))))))))))));
+{ uint64_t x69 = (((uint64_t)x5 * x31) + (0x11 * (((uint64_t)x7 * x54) + (((uint64_t)x9 * x55) + (((uint64_t)x11 * x53) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + (((uint64_t)x29 * x35) + ((uint64_t)x28 * x33)))))))))))))));
+{ uint64_t x70 = (x69 >> 0x18);
+{ uint32_t x71 = ((uint32_t)x69 & 0xffffff);
+{ uint64_t x72 = (x70 + x68);
+{ uint64_t x73 = (x72 >> 0x18);
+{ uint32_t x74 = ((uint32_t)x72 & 0xffffff);
+{ uint64_t x75 = (x73 + x67);
+{ uint64_t x76 = (x75 >> 0x18);
+{ uint32_t x77 = ((uint32_t)x75 & 0xffffff);
+{ uint64_t x78 = (x76 + x66);
+{ uint64_t x79 = (x78 >> 0x18);
+{ uint32_t x80 = ((uint32_t)x78 & 0xffffff);
+{ uint64_t x81 = (x79 + x65);
+{ uint64_t x82 = (x81 >> 0x18);
+{ uint32_t x83 = ((uint32_t)x81 & 0xffffff);
+{ uint64_t x84 = (x82 + x64);
+{ uint64_t x85 = (x84 >> 0x18);
+{ uint32_t x86 = ((uint32_t)x84 & 0xffffff);
+{ uint64_t x87 = (x85 + x63);
+{ uint64_t x88 = (x87 >> 0x18);
+{ uint32_t x89 = ((uint32_t)x87 & 0xffffff);
+{ uint64_t x90 = (x88 + x62);
+{ uint64_t x91 = (x90 >> 0x18);
+{ uint32_t x92 = ((uint32_t)x90 & 0xffffff);
+{ uint64_t x93 = (x91 + x61);
+{ uint64_t x94 = (x93 >> 0x18);
+{ uint32_t x95 = ((uint32_t)x93 & 0xffffff);
+{ uint64_t x96 = (x94 + x60);
+{ uint64_t x97 = (x96 >> 0x18);
+{ uint32_t x98 = ((uint32_t)x96 & 0xffffff);
+{ uint64_t x99 = (x97 + x59);
+{ uint64_t x100 = (x99 >> 0x18);
+{ uint32_t x101 = ((uint32_t)x99 & 0xffffff);
+{ uint64_t x102 = (x100 + x58);
+{ uint64_t x103 = (x102 >> 0x18);
+{ uint32_t x104 = ((uint32_t)x102 & 0xffffff);
+{ uint64_t x105 = (x103 + x57);
+{ uint64_t x106 = (x105 >> 0x18);
+{ uint32_t x107 = ((uint32_t)x105 & 0xffffff);
+{ uint64_t x108 = (x106 + x56);
+{ uint32_t x109 = (uint32_t) (x108 >> 0x18);
+{ uint32_t x110 = ((uint32_t)x108 & 0xffffff);
+{ uint64_t x111 = (x71 + ((uint64_t)0x11 * x109));
+{ uint32_t x112 = (uint32_t) (x111 >> 0x18);
+{ uint32_t x113 = ((uint32_t)x111 & 0xffffff);
+{ uint32_t x114 = (x112 + x74);
+{ uint32_t x115 = (x114 >> 0x18);
+{ uint32_t x116 = (x114 & 0xffffff);
+out[0] = x110;
+out[1] = x107;
+out[2] = x104;
+out[3] = x101;
+out[4] = x98;
+out[5] = x95;
+out[6] = x92;
+out[7] = x89;
+out[8] = x86;
+out[9] = x83;
+out[10] = x80;
+out[11] = x115 + x77;
+out[12] = x116;
+out[13] = x113;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/solinas32_2e336m17/femul.h b/src/Specific/solinas32_2e336m17/femul.h
new file mode 100644
index 000000000..6fdc2d29d
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x28, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x54, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31);
diff --git a/src/Specific/solinas32_2e336m17/fesquare.c b/src/Specific/solinas32_2e336m17/fesquare.c
new file mode 100644
index 000000000..5add565ee
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/fesquare.c
@@ -0,0 +1,96 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x27 = (((uint64_t)x2 * x25) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x25 * x2))))))))))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x26) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x26 * x2))))))))))))) + (0x11 * ((uint64_t)x25 * x25)));
+{ uint64_t x29 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0x11 * (((uint64_t)x26 * x25) + ((uint64_t)x25 * x26))));
+{ uint64_t x30 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x11 * (((uint64_t)x24 * x25) + (((uint64_t)x26 * x26) + ((uint64_t)x25 * x24)))));
+{ uint64_t x31 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x11 * (((uint64_t)x22 * x25) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((uint64_t)x25 * x22))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x11 * (((uint64_t)x20 * x25) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((uint64_t)x25 * x20)))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x11 * (((uint64_t)x18 * x25) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x25 * x18))))))));
+{ uint64_t x34 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x11 * (((uint64_t)x16 * x25) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + ((uint64_t)x25 * x16)))))))));
+{ uint64_t x35 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0x11 * (((uint64_t)x14 * x25) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((uint64_t)x25 * x14))))))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0x11 * (((uint64_t)x12 * x25) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((uint64_t)x25 * x12)))))))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0x11 * (((uint64_t)x10 * x25) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + ((uint64_t)x25 * x10))))))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0x11 * (((uint64_t)x8 * x25) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + ((uint64_t)x25 * x8)))))))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x11 * (((uint64_t)x6 * x25) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((uint64_t)x25 * x6))))))))))))));
+{ uint64_t x40 = (((uint64_t)x2 * x2) + (0x11 * (((uint64_t)x4 * x25) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((uint64_t)x25 * x4)))))))))))))));
+{ uint64_t x41 = (x40 >> 0x18);
+{ uint32_t x42 = ((uint32_t)x40 & 0xffffff);
+{ uint64_t x43 = (x41 + x39);
+{ uint64_t x44 = (x43 >> 0x18);
+{ uint32_t x45 = ((uint32_t)x43 & 0xffffff);
+{ uint64_t x46 = (x44 + x38);
+{ uint64_t x47 = (x46 >> 0x18);
+{ uint32_t x48 = ((uint32_t)x46 & 0xffffff);
+{ uint64_t x49 = (x47 + x37);
+{ uint64_t x50 = (x49 >> 0x18);
+{ uint32_t x51 = ((uint32_t)x49 & 0xffffff);
+{ uint64_t x52 = (x50 + x36);
+{ uint64_t x53 = (x52 >> 0x18);
+{ uint32_t x54 = ((uint32_t)x52 & 0xffffff);
+{ uint64_t x55 = (x53 + x35);
+{ uint64_t x56 = (x55 >> 0x18);
+{ uint32_t x57 = ((uint32_t)x55 & 0xffffff);
+{ uint64_t x58 = (x56 + x34);
+{ uint64_t x59 = (x58 >> 0x18);
+{ uint32_t x60 = ((uint32_t)x58 & 0xffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x18);
+{ uint32_t x63 = ((uint32_t)x61 & 0xffffff);
+{ uint64_t x64 = (x62 + x32);
+{ uint64_t x65 = (x64 >> 0x18);
+{ uint32_t x66 = ((uint32_t)x64 & 0xffffff);
+{ uint64_t x67 = (x65 + x31);
+{ uint64_t x68 = (x67 >> 0x18);
+{ uint32_t x69 = ((uint32_t)x67 & 0xffffff);
+{ uint64_t x70 = (x68 + x30);
+{ uint64_t x71 = (x70 >> 0x18);
+{ uint32_t x72 = ((uint32_t)x70 & 0xffffff);
+{ uint64_t x73 = (x71 + x29);
+{ uint64_t x74 = (x73 >> 0x18);
+{ uint32_t x75 = ((uint32_t)x73 & 0xffffff);
+{ uint64_t x76 = (x74 + x28);
+{ uint64_t x77 = (x76 >> 0x18);
+{ uint32_t x78 = ((uint32_t)x76 & 0xffffff);
+{ uint64_t x79 = (x77 + x27);
+{ uint32_t x80 = (uint32_t) (x79 >> 0x18);
+{ uint32_t x81 = ((uint32_t)x79 & 0xffffff);
+{ uint64_t x82 = (x42 + ((uint64_t)0x11 * x80));
+{ uint32_t x83 = (uint32_t) (x82 >> 0x18);
+{ uint32_t x84 = ((uint32_t)x82 & 0xffffff);
+{ uint32_t x85 = (x83 + x45);
+{ uint32_t x86 = (x85 >> 0x18);
+{ uint32_t x87 = (x85 & 0xffffff);
+out[0] = x81;
+out[1] = x78;
+out[2] = x75;
+out[3] = x72;
+out[4] = x69;
+out[5] = x66;
+out[6] = x63;
+out[7] = x60;
+out[8] = x57;
+out[9] = x54;
+out[10] = x51;
+out[11] = x86 + x48;
+out[12] = x87;
+out[13] = x84;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[14];
diff --git a/src/Specific/solinas32_2e336m17/fesquare.h b/src/Specific/solinas32_2e336m17/fesquare.h
new file mode 100644
index 000000000..5c03beeab
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e336m17/freeze.c b/src/Specific/solinas32_2e336m17/freeze.c
new file mode 100644
index 000000000..c6b0c0494
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x28;
+out[1] = uint8_t x29 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e336m17/freeze.h b/src/Specific/solinas32_2e336m17/freeze.h
new file mode 100644
index 000000000..0fda81e7a
--- /dev/null
+++ b/src/Specific/solinas32_2e336m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x25, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e336m3/femul.c b/src/Specific/solinas32_2e336m3/femul.c
new file mode 100644
index 000000000..e1eb4f061
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/femul.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27)
+{ uint64_t x48 = (((uint64_t)x5 * x46) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + (((uint64_t)x25 * x29) + ((uint64_t)x24 * x27))))))))))));
+{ uint64_t x49 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + (((uint64_t)x23 * x29) + ((uint64_t)x25 * x27))))))))))) + (0x3 * ((uint64_t)x24 * x46)));
+{ uint64_t x50 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + (((uint64_t)x21 * x29) + ((uint64_t)x23 * x27)))))))))) + (0x3 * (((uint64_t)x25 * x46) + ((uint64_t)x24 * x47))));
+{ uint64_t x51 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + (((uint64_t)x19 * x29) + ((uint64_t)x21 * x27))))))))) + (0x3 * (((uint64_t)x23 * x46) + (((uint64_t)x25 * x47) + ((uint64_t)x24 * x45)))));
+{ uint64_t x52 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + (((uint64_t)x17 * x29) + ((uint64_t)x19 * x27)))))))) + (0x3 * (((uint64_t)x21 * x46) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + ((uint64_t)x24 * x43))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + (((uint64_t)x15 * x29) + ((uint64_t)x17 * x27))))))) + (0x3 * (((uint64_t)x19 * x46) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + ((uint64_t)x24 * x41)))))));
+{ ℤ x54 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + (((uint64_t)x13 * x29) + ((uint64_t)x15 * x27)))))) +ℤ (0x3 * (((uint64_t)x17 * x46) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + ((uint64_t)x24 * x39))))))));
+{ ℤ x55 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + (((uint64_t)x11 * x29) + ((uint64_t)x13 * x27))))) +ℤ (0x3 * (((uint64_t)x15 * x46) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + ((uint64_t)x24 * x37)))))))));
+{ ℤ x56 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + (((uint64_t)x9 * x29) + ((uint64_t)x11 * x27)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x46) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + ((uint64_t)x24 * x35))))))))));
+{ ℤ x57 = ((((uint64_t)x5 * x31) + (((uint64_t)x7 * x29) + ((uint64_t)x9 * x27))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x46) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + ((uint64_t)x24 * x33)))))))))));
+{ ℤ x58 = ((((uint64_t)x5 * x29) + ((uint64_t)x7 * x27)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x46) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + ((uint64_t)x24 * x31))))))))))));
+{ ℤ x59 = (((uint64_t)x5 * x27) +ℤ (0x3 *ℤ (((uint64_t)x7 * x46) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x24 * x29)))))))))))));
+{ uint64_t x60 = (x59 >> 0x1c);
+{ uint32_t x61 = (x59 & 0xfffffff);
+{ ℤ x62 = (x60 +ℤ x58);
+{ uint64_t x63 = (x62 >> 0x1c);
+{ uint32_t x64 = (x62 & 0xfffffff);
+{ ℤ x65 = (x63 +ℤ x57);
+{ uint64_t x66 = (x65 >> 0x1c);
+{ uint32_t x67 = (x65 & 0xfffffff);
+{ ℤ x68 = (x66 +ℤ x56);
+{ uint64_t x69 = (x68 >> 0x1c);
+{ uint32_t x70 = (x68 & 0xfffffff);
+{ ℤ x71 = (x69 +ℤ x55);
+{ uint64_t x72 = (x71 >> 0x1c);
+{ uint32_t x73 = (x71 & 0xfffffff);
+{ ℤ x74 = (x72 +ℤ x54);
+{ uint64_t x75 = (x74 >> 0x1c);
+{ uint32_t x76 = (x74 & 0xfffffff);
+{ uint64_t x77 = (x75 + x53);
+{ uint64_t x78 = (x77 >> 0x1c);
+{ uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
+{ uint64_t x80 = (x78 + x52);
+{ uint64_t x81 = (x80 >> 0x1c);
+{ uint32_t x82 = ((uint32_t)x80 & 0xfffffff);
+{ uint64_t x83 = (x81 + x51);
+{ uint64_t x84 = (x83 >> 0x1c);
+{ uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
+{ uint64_t x86 = (x84 + x50);
+{ uint64_t x87 = (x86 >> 0x1c);
+{ uint32_t x88 = ((uint32_t)x86 & 0xfffffff);
+{ uint64_t x89 = (x87 + x49);
+{ uint64_t x90 = (x89 >> 0x1c);
+{ uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
+{ uint64_t x92 = (x90 + x48);
+{ uint64_t x93 = (x92 >> 0x1c);
+{ uint32_t x94 = ((uint32_t)x92 & 0xfffffff);
+{ uint64_t x95 = (x61 + (0x3 * x93));
+{ uint32_t x96 = (uint32_t) (x95 >> 0x1c);
+{ uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
+{ uint32_t x98 = (x96 + x64);
+{ uint32_t x99 = (x98 >> 0x1c);
+{ uint32_t x100 = (x98 & 0xfffffff);
+out[0] = x94;
+out[1] = x91;
+out[2] = x88;
+out[3] = x85;
+out[4] = x82;
+out[5] = x79;
+out[6] = x76;
+out[7] = x73;
+out[8] = x70;
+out[9] = x99 + x67;
+out[10] = x100;
+out[11] = x97;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e336m3/femul.h b/src/Specific/solinas32_2e336m3/femul.h
new file mode 100644
index 000000000..21cfc1a1b
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x24, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x46, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27);
diff --git a/src/Specific/solinas32_2e336m3/fesquare.c b/src/Specific/solinas32_2e336m3/fesquare.c
new file mode 100644
index 000000000..12ee7761a
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/fesquare.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x23 = (((uint64_t)x2 * x21) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x21 * x2))))))))))));
+{ uint64_t x24 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0x3 * ((uint64_t)x21 * x21)));
+{ uint64_t x25 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0x3 * (((uint64_t)x22 * x21) + ((uint64_t)x21 * x22))));
+{ uint64_t x26 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0x3 * (((uint64_t)x20 * x21) + (((uint64_t)x22 * x22) + ((uint64_t)x21 * x20)))));
+{ uint64_t x27 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0x3 * (((uint64_t)x18 * x21) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x21 * x18))))));
+{ uint64_t x28 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0x3 * (((uint64_t)x16 * x21) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + ((uint64_t)x21 * x16)))))));
+{ ℤ x29 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 * (((uint64_t)x14 * x21) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + ((uint64_t)x21 * x14))))))));
+{ ℤ x30 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) +ℤ (0x3 * (((uint64_t)x12 * x21) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + ((uint64_t)x21 * x12)))))))));
+{ ℤ x31 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x21) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + ((uint64_t)x21 * x10))))))))));
+{ ℤ x32 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x21) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + ((uint64_t)x21 * x8)))))))))));
+{ ℤ x33 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x21) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + ((uint64_t)x21 * x6))))))))))));
+{ ℤ x34 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ (((uint64_t)x4 * x21) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + ((uint64_t)x21 * x4)))))))))))));
+{ uint64_t x35 = (x34 >> 0x1c);
+{ uint32_t x36 = (x34 & 0xfffffff);
+{ ℤ x37 = (x35 +ℤ x33);
+{ uint64_t x38 = (x37 >> 0x1c);
+{ uint32_t x39 = (x37 & 0xfffffff);
+{ ℤ x40 = (x38 +ℤ x32);
+{ uint64_t x41 = (x40 >> 0x1c);
+{ uint32_t x42 = (x40 & 0xfffffff);
+{ ℤ x43 = (x41 +ℤ x31);
+{ uint64_t x44 = (x43 >> 0x1c);
+{ uint32_t x45 = (x43 & 0xfffffff);
+{ ℤ x46 = (x44 +ℤ x30);
+{ uint64_t x47 = (x46 >> 0x1c);
+{ uint32_t x48 = (x46 & 0xfffffff);
+{ ℤ x49 = (x47 +ℤ x29);
+{ uint64_t x50 = (x49 >> 0x1c);
+{ uint32_t x51 = (x49 & 0xfffffff);
+{ uint64_t x52 = (x50 + x28);
+{ uint64_t x53 = (x52 >> 0x1c);
+{ uint32_t x54 = ((uint32_t)x52 & 0xfffffff);
+{ uint64_t x55 = (x53 + x27);
+{ uint64_t x56 = (x55 >> 0x1c);
+{ uint32_t x57 = ((uint32_t)x55 & 0xfffffff);
+{ uint64_t x58 = (x56 + x26);
+{ uint64_t x59 = (x58 >> 0x1c);
+{ uint32_t x60 = ((uint32_t)x58 & 0xfffffff);
+{ uint64_t x61 = (x59 + x25);
+{ uint64_t x62 = (x61 >> 0x1c);
+{ uint32_t x63 = ((uint32_t)x61 & 0xfffffff);
+{ uint64_t x64 = (x62 + x24);
+{ uint64_t x65 = (x64 >> 0x1c);
+{ uint32_t x66 = ((uint32_t)x64 & 0xfffffff);
+{ uint64_t x67 = (x65 + x23);
+{ uint64_t x68 = (x67 >> 0x1c);
+{ uint32_t x69 = ((uint32_t)x67 & 0xfffffff);
+{ uint64_t x70 = (x36 + (0x3 * x68));
+{ uint32_t x71 = (uint32_t) (x70 >> 0x1c);
+{ uint32_t x72 = ((uint32_t)x70 & 0xfffffff);
+{ uint32_t x73 = (x71 + x39);
+{ uint32_t x74 = (x73 >> 0x1c);
+{ uint32_t x75 = (x73 & 0xfffffff);
+out[0] = x69;
+out[1] = x66;
+out[2] = x63;
+out[3] = x60;
+out[4] = x57;
+out[5] = x54;
+out[6] = x51;
+out[7] = x48;
+out[8] = x45;
+out[9] = x74 + x42;
+out[10] = x75;
+out[11] = x72;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[12];
diff --git a/src/Specific/solinas32_2e336m3/fesquare.h b/src/Specific/solinas32_2e336m3/fesquare.h
new file mode 100644
index 000000000..5d10fa419
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e336m3/freeze.c b/src/Specific/solinas32_2e336m3/freeze.c
new file mode 100644
index 000000000..f90fdb50f
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x24;
+out[1] = uint8_t x25 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e336m3/freeze.h b/src/Specific/solinas32_2e336m3/freeze.h
new file mode 100644
index 000000000..e3e185ec8
--- /dev/null
+++ b/src/Specific/solinas32_2e336m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x21, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e338m15/femul.c b/src/Specific/solinas32_2e338m15/femul.c
new file mode 100644
index 000000000..f89890ffb
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/femul.c
@@ -0,0 +1,91 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29)
+{ uint64_t x52 = (((uint64_t)x5 * x50) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + (((uint64_t)x23 * x35) + (((uint64_t)x25 * x33) + (((uint64_t)x27 * x31) + ((uint64_t)x26 * x29)))))))))))));
+{ uint64_t x53 = ((((uint64_t)x5 * x51) + (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + (((uint64_t)x19 * x37) + (((uint64_t)x21 * x35) + (((uint64_t)x23 * x33) + (((uint64_t)x25 * x31) + ((uint64_t)x27 * x29)))))))))))) + (0xf * ((uint64_t)x26 * x50)));
+{ uint64_t x54 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + (((uint64_t)x19 * x35) + (((uint64_t)x21 * x33) + (((uint64_t)x23 * x31) + ((uint64_t)x25 * x29))))))))))) + (0xf * (((uint64_t)x27 * x50) + ((uint64_t)x26 * x51))));
+{ uint64_t x55 = ((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + (((uint64_t)x17 * x35) + (((uint64_t)x19 * x33) + (((uint64_t)x21 * x31) + ((uint64_t)x23 * x29)))))))))) + (0xf * (((uint64_t)x25 * x50) + (((uint64_t)x27 * x51) + ((uint64_t)x26 * x49)))));
+{ uint64_t x56 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + (((uint64_t)x15 * x35) + (((uint64_t)x17 * x33) + (((uint64_t)x19 * x31) + ((uint64_t)x21 * x29))))))))) + (0xf * (((uint64_t)x23 * x50) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + ((uint64_t)x26 * x47))))));
+{ uint64_t x57 = ((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + (((uint64_t)x13 * x35) + (((uint64_t)x15 * x33) + (((uint64_t)x17 * x31) + ((uint64_t)x19 * x29)))))))) + (0xf * (((uint64_t)x21 * x50) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + ((uint64_t)x26 * x45)))))));
+{ uint64_t x58 = ((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + (((uint64_t)x11 * x35) + (((uint64_t)x13 * x33) + (((uint64_t)x15 * x31) + ((uint64_t)x17 * x29))))))) + (0xf * (((uint64_t)x19 * x50) + (((uint64_t)x21 * x51) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + ((uint64_t)x26 * x43))))))));
+{ uint64_t x59 = ((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + (((uint64_t)x9 * x35) + (((uint64_t)x11 * x33) + (((uint64_t)x13 * x31) + ((uint64_t)x15 * x29)))))) + (0xf * (((uint64_t)x17 * x50) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + ((uint64_t)x26 * x41)))))))));
+{ uint64_t x60 = ((((uint64_t)x5 * x37) + (((uint64_t)x7 * x35) + (((uint64_t)x9 * x33) + (((uint64_t)x11 * x31) + ((uint64_t)x13 * x29))))) + (0xf * (((uint64_t)x15 * x50) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + ((uint64_t)x26 * x39))))))))));
+{ uint64_t x61 = ((((uint64_t)x5 * x35) + (((uint64_t)x7 * x33) + (((uint64_t)x9 * x31) + ((uint64_t)x11 * x29)))) + (0xf * (((uint64_t)x13 * x50) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + ((uint64_t)x26 * x37)))))))))));
+{ uint64_t x62 = ((((uint64_t)x5 * x33) + (((uint64_t)x7 * x31) + ((uint64_t)x9 * x29))) + (0xf * (((uint64_t)x11 * x50) + (((uint64_t)x13 * x51) + (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + (((uint64_t)x19 * x45) + (((uint64_t)x21 * x43) + (((uint64_t)x23 * x41) + (((uint64_t)x25 * x39) + (((uint64_t)x27 * x37) + ((uint64_t)x26 * x35))))))))))));
+{ uint64_t x63 = ((((uint64_t)x5 * x31) + ((uint64_t)x7 * x29)) + (0xf * (((uint64_t)x9 * x50) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + (((uint64_t)x23 * x39) + (((uint64_t)x25 * x37) + (((uint64_t)x27 * x35) + ((uint64_t)x26 * x33)))))))))))));
+{ uint64_t x64 = (((uint64_t)x5 * x29) + (0xf * (((uint64_t)x7 * x50) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + (((uint64_t)x23 * x37) + (((uint64_t)x25 * x35) + (((uint64_t)x27 * x33) + ((uint64_t)x26 * x31))))))))))))));
+{ uint64_t x65 = (x64 >> 0x1a);
+{ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
+{ uint64_t x67 = (x65 + x63);
+{ uint64_t x68 = (x67 >> 0x1a);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+{ uint64_t x70 = (x68 + x62);
+{ uint64_t x71 = (x70 >> 0x1a);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
+{ uint64_t x73 = (x71 + x61);
+{ uint64_t x74 = (x73 >> 0x1a);
+{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+{ uint64_t x76 = (x74 + x60);
+{ uint64_t x77 = (x76 >> 0x1a);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
+{ uint64_t x79 = (x77 + x59);
+{ uint64_t x80 = (x79 >> 0x1a);
+{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+{ uint64_t x82 = (x80 + x58);
+{ uint64_t x83 = (x82 >> 0x1a);
+{ uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
+{ uint64_t x85 = (x83 + x57);
+{ uint64_t x86 = (x85 >> 0x1a);
+{ uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
+{ uint64_t x88 = (x86 + x56);
+{ uint64_t x89 = (x88 >> 0x1a);
+{ uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
+{ uint64_t x91 = (x89 + x55);
+{ uint64_t x92 = (x91 >> 0x1a);
+{ uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
+{ uint64_t x94 = (x92 + x54);
+{ uint64_t x95 = (x94 >> 0x1a);
+{ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+{ uint64_t x97 = (x95 + x53);
+{ uint64_t x98 = (x97 >> 0x1a);
+{ uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
+{ uint64_t x100 = (x98 + x52);
+{ uint64_t x101 = (x100 >> 0x1a);
+{ uint32_t x102 = ((uint32_t)x100 & 0x3ffffff);
+{ uint64_t x103 = (x66 + (0xf * x101));
+{ uint32_t x104 = (uint32_t) (x103 >> 0x1a);
+{ uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
+{ uint32_t x106 = (x104 + x69);
+{ uint32_t x107 = (x106 >> 0x1a);
+{ uint32_t x108 = (x106 & 0x3ffffff);
+out[0] = x102;
+out[1] = x99;
+out[2] = x96;
+out[3] = x93;
+out[4] = x90;
+out[5] = x87;
+out[6] = x84;
+out[7] = x81;
+out[8] = x78;
+out[9] = x75;
+out[10] = x107 + x72;
+out[11] = x108;
+out[12] = x105;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/solinas32_2e338m15/femul.h b/src/Specific/solinas32_2e338m15/femul.h
new file mode 100644
index 000000000..bb28fc9e2
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x50, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29);
diff --git a/src/Specific/solinas32_2e338m15/fesquare.c b/src/Specific/solinas32_2e338m15/fesquare.c
new file mode 100644
index 000000000..d5ee7aa1d
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/fesquare.c
@@ -0,0 +1,91 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x25 = (((uint64_t)x2 * x23) + (((uint64_t)x4 * x24) + (((uint64_t)x6 * x22) + (((uint64_t)x8 * x20) + (((uint64_t)x10 * x18) + (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + (((uint64_t)x16 * x12) + (((uint64_t)x18 * x10) + (((uint64_t)x20 * x8) + (((uint64_t)x22 * x6) + (((uint64_t)x24 * x4) + ((uint64_t)x23 * x2)))))))))))));
+{ uint64_t x26 = ((((uint64_t)x2 * x24) + (((uint64_t)x4 * x22) + (((uint64_t)x6 * x20) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + (((uint64_t)x20 * x6) + (((uint64_t)x22 * x4) + ((uint64_t)x24 * x2)))))))))))) + (0xf * ((uint64_t)x23 * x23)));
+{ uint64_t x27 = ((((uint64_t)x2 * x22) + (((uint64_t)x4 * x20) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + (((uint64_t)x20 * x4) + ((uint64_t)x22 * x2))))))))))) + (0xf * (((uint64_t)x24 * x23) + ((uint64_t)x23 * x24))));
+{ uint64_t x28 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) + (0xf * (((uint64_t)x22 * x23) + (((uint64_t)x24 * x24) + ((uint64_t)x23 * x22)))));
+{ uint64_t x29 = ((((uint64_t)x2 * x18) + (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + (((uint64_t)x16 * x4) + ((uint64_t)x18 * x2))))))))) + (0xf * (((uint64_t)x20 * x23) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + ((uint64_t)x23 * x20))))));
+{ uint64_t x30 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (0xf * (((uint64_t)x18 * x23) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x23 * x18)))))));
+{ uint64_t x31 = ((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (0xf * (((uint64_t)x16 * x23) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + ((uint64_t)x23 * x16))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (0xf * (((uint64_t)x14 * x23) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + ((uint64_t)x23 * x14)))))))));
+{ uint64_t x33 = ((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (0xf * (((uint64_t)x12 * x23) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + ((uint64_t)x23 * x12))))))))));
+{ uint64_t x34 = ((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (0xf * (((uint64_t)x10 * x23) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + (((uint64_t)x16 * x20) + (((uint64_t)x18 * x18) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + ((uint64_t)x23 * x10)))))))))));
+{ uint64_t x35 = ((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (0xf * (((uint64_t)x8 * x23) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + ((uint64_t)x23 * x8))))))))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0xf * (((uint64_t)x6 * x23) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + ((uint64_t)x23 * x6)))))))))))));
+{ uint64_t x37 = (((uint64_t)x2 * x2) + (0xf * (((uint64_t)x4 * x23) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + ((uint64_t)x23 * x4))))))))))))));
+{ uint64_t x38 = (x37 >> 0x1a);
+{ uint32_t x39 = ((uint32_t)x37 & 0x3ffffff);
+{ uint64_t x40 = (x38 + x36);
+{ uint64_t x41 = (x40 >> 0x1a);
+{ uint32_t x42 = ((uint32_t)x40 & 0x3ffffff);
+{ uint64_t x43 = (x41 + x35);
+{ uint64_t x44 = (x43 >> 0x1a);
+{ uint32_t x45 = ((uint32_t)x43 & 0x3ffffff);
+{ uint64_t x46 = (x44 + x34);
+{ uint64_t x47 = (x46 >> 0x1a);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+{ uint64_t x49 = (x47 + x33);
+{ uint64_t x50 = (x49 >> 0x1a);
+{ uint32_t x51 = ((uint32_t)x49 & 0x3ffffff);
+{ uint64_t x52 = (x50 + x32);
+{ uint64_t x53 = (x52 >> 0x1a);
+{ uint32_t x54 = ((uint32_t)x52 & 0x3ffffff);
+{ uint64_t x55 = (x53 + x31);
+{ uint64_t x56 = (x55 >> 0x1a);
+{ uint32_t x57 = ((uint32_t)x55 & 0x3ffffff);
+{ uint64_t x58 = (x56 + x30);
+{ uint64_t x59 = (x58 >> 0x1a);
+{ uint32_t x60 = ((uint32_t)x58 & 0x3ffffff);
+{ uint64_t x61 = (x59 + x29);
+{ uint64_t x62 = (x61 >> 0x1a);
+{ uint32_t x63 = ((uint32_t)x61 & 0x3ffffff);
+{ uint64_t x64 = (x62 + x28);
+{ uint64_t x65 = (x64 >> 0x1a);
+{ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
+{ uint64_t x67 = (x65 + x27);
+{ uint64_t x68 = (x67 >> 0x1a);
+{ uint32_t x69 = ((uint32_t)x67 & 0x3ffffff);
+{ uint64_t x70 = (x68 + x26);
+{ uint64_t x71 = (x70 >> 0x1a);
+{ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
+{ uint64_t x73 = (x71 + x25);
+{ uint64_t x74 = (x73 >> 0x1a);
+{ uint32_t x75 = ((uint32_t)x73 & 0x3ffffff);
+{ uint64_t x76 = (x39 + (0xf * x74));
+{ uint32_t x77 = (uint32_t) (x76 >> 0x1a);
+{ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
+{ uint32_t x79 = (x77 + x42);
+{ uint32_t x80 = (x79 >> 0x1a);
+{ uint32_t x81 = (x79 & 0x3ffffff);
+out[0] = x75;
+out[1] = x72;
+out[2] = x69;
+out[3] = x66;
+out[4] = x63;
+out[5] = x60;
+out[6] = x57;
+out[7] = x54;
+out[8] = x51;
+out[9] = x48;
+out[10] = x80 + x45;
+out[11] = x81;
+out[12] = x78;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[13];
diff --git a/src/Specific/solinas32_2e338m15/fesquare.h b/src/Specific/solinas32_2e338m15/fesquare.h
new file mode 100644
index 000000000..6d7db17a2
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e338m15/freeze.c b/src/Specific/solinas32_2e338m15/freeze.c
new file mode 100644
index 000000000..80856537d
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x26;
+out[1] = uint8_t x27 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e338m15/freeze.h b/src/Specific/solinas32_2e338m15/freeze.h
new file mode 100644
index 000000000..111fdfcc7
--- /dev/null
+++ b/src/Specific/solinas32_2e338m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x23, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e369m25/femul.c b/src/Specific/solinas32_2e369m25/femul.c
new file mode 100644
index 000000000..9d097727b
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/femul.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x19 * ((uint64_t)x32 * x62)));
+{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x19 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x19 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x19 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x19 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x19 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x19 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x19 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x19 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x19 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x19 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x19 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x19 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x19 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x19 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+{ uint64_t x80 = (x79 >> 0x18);
+{ uint32_t x81 = ((uint32_t)x79 & 0xffffff);
+{ uint64_t x82 = (x80 + x78);
+{ uint64_t x83 = (x82 >> 0x17);
+{ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+{ uint64_t x85 = (x83 + x77);
+{ uint64_t x86 = (x85 >> 0x17);
+{ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+{ uint64_t x88 = (x86 + x76);
+{ uint64_t x89 = (x88 >> 0x17);
+{ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+{ uint64_t x91 = (x89 + x75);
+{ uint64_t x92 = (x91 >> 0x17);
+{ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+{ uint64_t x94 = (x92 + x74);
+{ uint64_t x95 = (x94 >> 0x17);
+{ uint32_t x96 = ((uint32_t)x94 & 0x7fffff);
+{ uint64_t x97 = (x95 + x73);
+{ uint64_t x98 = (x97 >> 0x17);
+{ uint32_t x99 = ((uint32_t)x97 & 0x7fffff);
+{ uint64_t x100 = (x98 + x72);
+{ uint64_t x101 = (x100 >> 0x17);
+{ uint32_t x102 = ((uint32_t)x100 & 0x7fffff);
+{ uint64_t x103 = (x101 + x71);
+{ uint64_t x104 = (x103 >> 0x17);
+{ uint32_t x105 = ((uint32_t)x103 & 0x7fffff);
+{ uint64_t x106 = (x104 + x70);
+{ uint64_t x107 = (x106 >> 0x17);
+{ uint32_t x108 = ((uint32_t)x106 & 0x7fffff);
+{ uint64_t x109 = (x107 + x69);
+{ uint64_t x110 = (x109 >> 0x17);
+{ uint32_t x111 = ((uint32_t)x109 & 0x7fffff);
+{ uint64_t x112 = (x110 + x68);
+{ uint64_t x113 = (x112 >> 0x17);
+{ uint32_t x114 = ((uint32_t)x112 & 0x7fffff);
+{ uint64_t x115 = (x113 + x67);
+{ uint64_t x116 = (x115 >> 0x17);
+{ uint32_t x117 = ((uint32_t)x115 & 0x7fffff);
+{ uint64_t x118 = (x116 + x66);
+{ uint64_t x119 = (x118 >> 0x17);
+{ uint32_t x120 = ((uint32_t)x118 & 0x7fffff);
+{ uint64_t x121 = (x119 + x65);
+{ uint64_t x122 = (x121 >> 0x17);
+{ uint32_t x123 = ((uint32_t)x121 & 0x7fffff);
+{ uint64_t x124 = (x122 + x64);
+{ uint32_t x125 = (uint32_t) (x124 >> 0x17);
+{ uint32_t x126 = ((uint32_t)x124 & 0x7fffff);
+{ uint64_t x127 = (x81 + ((uint64_t)0x19 * x125));
+{ uint32_t x128 = (uint32_t) (x127 >> 0x18);
+{ uint32_t x129 = ((uint32_t)x127 & 0xffffff);
+{ uint32_t x130 = (x128 + x84);
+{ uint32_t x131 = (x130 >> 0x17);
+{ uint32_t x132 = (x130 & 0x7fffff);
+out[0] = x126;
+out[1] = x123;
+out[2] = x120;
+out[3] = x117;
+out[4] = x114;
+out[5] = x111;
+out[6] = x108;
+out[7] = x105;
+out[8] = x102;
+out[9] = x99;
+out[10] = x96;
+out[11] = x93;
+out[12] = x90;
+out[13] = x131 + x87;
+out[14] = x132;
+out[15] = x129;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e369m25/femul.h b/src/Specific/solinas32_2e369m25/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e369m25/femulDisplay.log b/src/Specific/solinas32_2e369m25/femulDisplay.log
index 0ad0e196b..de38629da 100644
--- a/src/Specific/solinas32_2e369m25/femulDisplay.log
+++ b/src/Specific/solinas32_2e369m25/femulDisplay.log
@@ -2,75 +2,75 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
- uint64_t x64 = (uint64_t) x5 * x62 + (0x2 * ((uint64_t) x7 * x63) + (0x2 * ((uint64_t) x9 * x61) + (0x2 * ((uint64_t) x11 * x59) + (0x2 * ((uint64_t) x13 * x57) + (0x2 * ((uint64_t) x15 * x55) + (0x2 * ((uint64_t) x17 * x53) + (0x2 * ((uint64_t) x19 * x51) + (0x2 * ((uint64_t) x21 * x49) + (0x2 * ((uint64_t) x23 * x47) + (0x2 * ((uint64_t) x25 * x45) + (0x2 * ((uint64_t) x27 * x43) + (0x2 * ((uint64_t) x29 * x41) + (0x2 * ((uint64_t) x31 * x39) + (0x2 * ((uint64_t) x33 * x37) + (uint64_t) x32 * x35))))))))))))));
- uint64_t x65 = (uint64_t) x5 * x63 + (0x2 * ((uint64_t) x7 * x61) + (0x2 * ((uint64_t) x9 * x59) + (0x2 * ((uint64_t) x11 * x57) + (0x2 * ((uint64_t) x13 * x55) + (0x2 * ((uint64_t) x15 * x53) + (0x2 * ((uint64_t) x17 * x51) + (0x2 * ((uint64_t) x19 * x49) + (0x2 * ((uint64_t) x21 * x47) + (0x2 * ((uint64_t) x23 * x45) + (0x2 * ((uint64_t) x25 * x43) + (0x2 * ((uint64_t) x27 * x41) + (0x2 * ((uint64_t) x29 * x39) + (0x2 * ((uint64_t) x31 * x37) + (uint64_t) x33 * x35))))))))))))) + 0x19 * ((uint64_t) x32 * x62);
- uint64_t x66 = (uint64_t) x5 * x61 + (0x2 * ((uint64_t) x7 * x59) + (0x2 * ((uint64_t) x9 * x57) + (0x2 * ((uint64_t) x11 * x55) + (0x2 * ((uint64_t) x13 * x53) + (0x2 * ((uint64_t) x15 * x51) + (0x2 * ((uint64_t) x17 * x49) + (0x2 * ((uint64_t) x19 * x47) + (0x2 * ((uint64_t) x21 * x45) + (0x2 * ((uint64_t) x23 * x43) + (0x2 * ((uint64_t) x25 * x41) + (0x2 * ((uint64_t) x27 * x39) + (0x2 * ((uint64_t) x29 * x37) + (uint64_t) x31 * x35)))))))))))) + 0x19 * ((uint64_t) x33 * x62 + (uint64_t) x32 * x63);
- uint64_t x67 = (uint64_t) x5 * x59 + (0x2 * ((uint64_t) x7 * x57) + (0x2 * ((uint64_t) x9 * x55) + (0x2 * ((uint64_t) x11 * x53) + (0x2 * ((uint64_t) x13 * x51) + (0x2 * ((uint64_t) x15 * x49) + (0x2 * ((uint64_t) x17 * x47) + (0x2 * ((uint64_t) x19 * x45) + (0x2 * ((uint64_t) x21 * x43) + (0x2 * ((uint64_t) x23 * x41) + (0x2 * ((uint64_t) x25 * x39) + (0x2 * ((uint64_t) x27 * x37) + (uint64_t) x29 * x35))))))))))) + 0x19 * ((uint64_t) x31 * x62 + ((uint64_t) x33 * x63 + (uint64_t) x32 * x61));
- uint64_t x68 = (uint64_t) x5 * x57 + (0x2 * ((uint64_t) x7 * x55) + (0x2 * ((uint64_t) x9 * x53) + (0x2 * ((uint64_t) x11 * x51) + (0x2 * ((uint64_t) x13 * x49) + (0x2 * ((uint64_t) x15 * x47) + (0x2 * ((uint64_t) x17 * x45) + (0x2 * ((uint64_t) x19 * x43) + (0x2 * ((uint64_t) x21 * x41) + (0x2 * ((uint64_t) x23 * x39) + (0x2 * ((uint64_t) x25 * x37) + (uint64_t) x27 * x35)))))))))) + 0x19 * ((uint64_t) x29 * x62 + ((uint64_t) x31 * x63 + ((uint64_t) x33 * x61 + (uint64_t) x32 * x59)));
- uint64_t x69 = (uint64_t) x5 * x55 + (0x2 * ((uint64_t) x7 * x53) + (0x2 * ((uint64_t) x9 * x51) + (0x2 * ((uint64_t) x11 * x49) + (0x2 * ((uint64_t) x13 * x47) + (0x2 * ((uint64_t) x15 * x45) + (0x2 * ((uint64_t) x17 * x43) + (0x2 * ((uint64_t) x19 * x41) + (0x2 * ((uint64_t) x21 * x39) + (0x2 * ((uint64_t) x23 * x37) + (uint64_t) x25 * x35))))))))) + 0x19 * ((uint64_t) x27 * x62 + ((uint64_t) x29 * x63 + ((uint64_t) x31 * x61 + ((uint64_t) x33 * x59 + (uint64_t) x32 * x57))));
- uint64_t x70 = (uint64_t) x5 * x53 + (0x2 * ((uint64_t) x7 * x51) + (0x2 * ((uint64_t) x9 * x49) + (0x2 * ((uint64_t) x11 * x47) + (0x2 * ((uint64_t) x13 * x45) + (0x2 * ((uint64_t) x15 * x43) + (0x2 * ((uint64_t) x17 * x41) + (0x2 * ((uint64_t) x19 * x39) + (0x2 * ((uint64_t) x21 * x37) + (uint64_t) x23 * x35)))))))) + 0x19 * ((uint64_t) x25 * x62 + ((uint64_t) x27 * x63 + ((uint64_t) x29 * x61 + ((uint64_t) x31 * x59 + ((uint64_t) x33 * x57 + (uint64_t) x32 * x55)))));
- uint64_t x71 = (uint64_t) x5 * x51 + (0x2 * ((uint64_t) x7 * x49) + (0x2 * ((uint64_t) x9 * x47) + (0x2 * ((uint64_t) x11 * x45) + (0x2 * ((uint64_t) x13 * x43) + (0x2 * ((uint64_t) x15 * x41) + (0x2 * ((uint64_t) x17 * x39) + (0x2 * ((uint64_t) x19 * x37) + (uint64_t) x21 * x35))))))) + 0x19 * ((uint64_t) x23 * x62 + ((uint64_t) x25 * x63 + ((uint64_t) x27 * x61 + ((uint64_t) x29 * x59 + ((uint64_t) x31 * x57 + ((uint64_t) x33 * x55 + (uint64_t) x32 * x53))))));
- uint64_t x72 = (uint64_t) x5 * x49 + (0x2 * ((uint64_t) x7 * x47) + (0x2 * ((uint64_t) x9 * x45) + (0x2 * ((uint64_t) x11 * x43) + (0x2 * ((uint64_t) x13 * x41) + (0x2 * ((uint64_t) x15 * x39) + (0x2 * ((uint64_t) x17 * x37) + (uint64_t) x19 * x35)))))) + 0x19 * ((uint64_t) x21 * x62 + ((uint64_t) x23 * x63 + ((uint64_t) x25 * x61 + ((uint64_t) x27 * x59 + ((uint64_t) x29 * x57 + ((uint64_t) x31 * x55 + ((uint64_t) x33 * x53 + (uint64_t) x32 * x51)))))));
- uint64_t x73 = (uint64_t) x5 * x47 + (0x2 * ((uint64_t) x7 * x45) + (0x2 * ((uint64_t) x9 * x43) + (0x2 * ((uint64_t) x11 * x41) + (0x2 * ((uint64_t) x13 * x39) + (0x2 * ((uint64_t) x15 * x37) + (uint64_t) x17 * x35))))) + 0x19 * ((uint64_t) x19 * x62 + ((uint64_t) x21 * x63 + ((uint64_t) x23 * x61 + ((uint64_t) x25 * x59 + ((uint64_t) x27 * x57 + ((uint64_t) x29 * x55 + ((uint64_t) x31 * x53 + ((uint64_t) x33 * x51 + (uint64_t) x32 * x49))))))));
- uint64_t x74 = (uint64_t) x5 * x45 + (0x2 * ((uint64_t) x7 * x43) + (0x2 * ((uint64_t) x9 * x41) + (0x2 * ((uint64_t) x11 * x39) + (0x2 * ((uint64_t) x13 * x37) + (uint64_t) x15 * x35)))) + 0x19 * ((uint64_t) x17 * x62 + ((uint64_t) x19 * x63 + ((uint64_t) x21 * x61 + ((uint64_t) x23 * x59 + ((uint64_t) x25 * x57 + ((uint64_t) x27 * x55 + ((uint64_t) x29 * x53 + ((uint64_t) x31 * x51 + ((uint64_t) x33 * x49 + (uint64_t) x32 * x47)))))))));
- uint64_t x75 = (uint64_t) x5 * x43 + (0x2 * ((uint64_t) x7 * x41) + (0x2 * ((uint64_t) x9 * x39) + (0x2 * ((uint64_t) x11 * x37) + (uint64_t) x13 * x35))) + 0x19 * ((uint64_t) x15 * x62 + ((uint64_t) x17 * x63 + ((uint64_t) x19 * x61 + ((uint64_t) x21 * x59 + ((uint64_t) x23 * x57 + ((uint64_t) x25 * x55 + ((uint64_t) x27 * x53 + ((uint64_t) x29 * x51 + ((uint64_t) x31 * x49 + ((uint64_t) x33 * x47 + (uint64_t) x32 * x45))))))))));
- uint64_t x76 = (uint64_t) x5 * x41 + (0x2 * ((uint64_t) x7 * x39) + (0x2 * ((uint64_t) x9 * x37) + (uint64_t) x11 * x35)) + 0x19 * ((uint64_t) x13 * x62 + ((uint64_t) x15 * x63 + ((uint64_t) x17 * x61 + ((uint64_t) x19 * x59 + ((uint64_t) x21 * x57 + ((uint64_t) x23 * x55 + ((uint64_t) x25 * x53 + ((uint64_t) x27 * x51 + ((uint64_t) x29 * x49 + ((uint64_t) x31 * x47 + ((uint64_t) x33 * x45 + (uint64_t) x32 * x43)))))))))));
- uint64_t x77 = (uint64_t) x5 * x39 + (0x2 * ((uint64_t) x7 * x37) + (uint64_t) x9 * x35) + 0x19 * ((uint64_t) x11 * x62 + ((uint64_t) x13 * x63 + ((uint64_t) x15 * x61 + ((uint64_t) x17 * x59 + ((uint64_t) x19 * x57 + ((uint64_t) x21 * x55 + ((uint64_t) x23 * x53 + ((uint64_t) x25 * x51 + ((uint64_t) x27 * x49 + ((uint64_t) x29 * x47 + ((uint64_t) x31 * x45 + ((uint64_t) x33 * x43 + (uint64_t) x32 * x41))))))))))));
- uint64_t x78 = (uint64_t) x5 * x37 + (uint64_t) x7 * x35 + 0x19 * ((uint64_t) x9 * x62 + ((uint64_t) x11 * x63 + ((uint64_t) x13 * x61 + ((uint64_t) x15 * x59 + ((uint64_t) x17 * x57 + ((uint64_t) x19 * x55 + ((uint64_t) x21 * x53 + ((uint64_t) x23 * x51 + ((uint64_t) x25 * x49 + ((uint64_t) x27 * x47 + ((uint64_t) x29 * x45 + ((uint64_t) x31 * x43 + ((uint64_t) x33 * x41 + (uint64_t) x32 * x39)))))))))))));
- uint64_t x79 = (uint64_t) x5 * x35 + 0x19 * (0x2 * ((uint64_t) x7 * x62) + (0x2 * ((uint64_t) x9 * x63) + (0x2 * ((uint64_t) x11 * x61) + (0x2 * ((uint64_t) x13 * x59) + (0x2 * ((uint64_t) x15 * x57) + (0x2 * ((uint64_t) x17 * x55) + (0x2 * ((uint64_t) x19 * x53) + (0x2 * ((uint64_t) x21 * x51) + (0x2 * ((uint64_t) x23 * x49) + (0x2 * ((uint64_t) x25 * x47) + (0x2 * ((uint64_t) x27 * x45) + (0x2 * ((uint64_t) x29 * x43) + (0x2 * ((uint64_t) x31 * x41) + (0x2 * ((uint64_t) x33 * x39) + 0x2 * ((uint64_t) x32 * x37)))))))))))))));
- uint32_t x80 = (uint32_t) (x79 >> 0x18);
- uint32_t x81 = (uint32_t) x79 & 0xffffff;
- uint64_t x82 = x80 + x78;
- uint32_t x83 = (uint32_t) (x82 >> 0x17);
- uint32_t x84 = (uint32_t) x82 & 0x7fffff;
- uint64_t x85 = x83 + x77;
- uint32_t x86 = (uint32_t) (x85 >> 0x17);
- uint32_t x87 = (uint32_t) x85 & 0x7fffff;
- uint64_t x88 = x86 + x76;
- uint32_t x89 = (uint32_t) (x88 >> 0x17);
- uint32_t x90 = (uint32_t) x88 & 0x7fffff;
- uint64_t x91 = x89 + x75;
- uint32_t x92 = (uint32_t) (x91 >> 0x17);
- uint32_t x93 = (uint32_t) x91 & 0x7fffff;
- uint64_t x94 = x92 + x74;
- uint32_t x95 = (uint32_t) (x94 >> 0x17);
- uint32_t x96 = (uint32_t) x94 & 0x7fffff;
- uint64_t x97 = x95 + x73;
- uint32_t x98 = (uint32_t) (x97 >> 0x17);
- uint32_t x99 = (uint32_t) x97 & 0x7fffff;
- uint64_t x100 = x98 + x72;
- uint32_t x101 = (uint32_t) (x100 >> 0x17);
- uint32_t x102 = (uint32_t) x100 & 0x7fffff;
- uint64_t x103 = x101 + x71;
- uint32_t x104 = (uint32_t) (x103 >> 0x17);
- uint32_t x105 = (uint32_t) x103 & 0x7fffff;
- uint64_t x106 = x104 + x70;
- uint32_t x107 = (uint32_t) (x106 >> 0x17);
- uint32_t x108 = (uint32_t) x106 & 0x7fffff;
- uint64_t x109 = x107 + x69;
- uint32_t x110 = (uint32_t) (x109 >> 0x17);
- uint32_t x111 = (uint32_t) x109 & 0x7fffff;
- uint64_t x112 = x110 + x68;
- uint32_t x113 = (uint32_t) (x112 >> 0x17);
- uint32_t x114 = (uint32_t) x112 & 0x7fffff;
- uint64_t x115 = x113 + x67;
- uint32_t x116 = (uint32_t) (x115 >> 0x17);
- uint32_t x117 = (uint32_t) x115 & 0x7fffff;
- uint64_t x118 = x116 + x66;
- uint32_t x119 = (uint32_t) (x118 >> 0x17);
- uint32_t x120 = (uint32_t) x118 & 0x7fffff;
- uint64_t x121 = x119 + x65;
- uint32_t x122 = (uint32_t) (x121 >> 0x17);
- uint32_t x123 = (uint32_t) x121 & 0x7fffff;
- uint64_t x124 = x122 + x64;
+ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x19 * ((uint64_t)x32 * x62)));
+ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x19 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x19 * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x19 * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x19 * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x19 * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x19 * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x19 * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x19 * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x19 * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x19 * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x19 * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x19 * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x19 * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ uint64_t x79 = (((uint64_t)x5 * x35) + (0x19 * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ uint64_t x80 = (x79 >> 0x18);
+ uint32_t x81 = ((uint32_t)x79 & 0xffffff);
+ uint64_t x82 = (x80 + x78);
+ uint64_t x83 = (x82 >> 0x17);
+ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+ uint64_t x85 = (x83 + x77);
+ uint64_t x86 = (x85 >> 0x17);
+ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+ uint64_t x88 = (x86 + x76);
+ uint64_t x89 = (x88 >> 0x17);
+ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+ uint64_t x91 = (x89 + x75);
+ uint64_t x92 = (x91 >> 0x17);
+ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+ uint64_t x94 = (x92 + x74);
+ uint64_t x95 = (x94 >> 0x17);
+ uint32_t x96 = ((uint32_t)x94 & 0x7fffff);
+ uint64_t x97 = (x95 + x73);
+ uint64_t x98 = (x97 >> 0x17);
+ uint32_t x99 = ((uint32_t)x97 & 0x7fffff);
+ uint64_t x100 = (x98 + x72);
+ uint64_t x101 = (x100 >> 0x17);
+ uint32_t x102 = ((uint32_t)x100 & 0x7fffff);
+ uint64_t x103 = (x101 + x71);
+ uint64_t x104 = (x103 >> 0x17);
+ uint32_t x105 = ((uint32_t)x103 & 0x7fffff);
+ uint64_t x106 = (x104 + x70);
+ uint64_t x107 = (x106 >> 0x17);
+ uint32_t x108 = ((uint32_t)x106 & 0x7fffff);
+ uint64_t x109 = (x107 + x69);
+ uint64_t x110 = (x109 >> 0x17);
+ uint32_t x111 = ((uint32_t)x109 & 0x7fffff);
+ uint64_t x112 = (x110 + x68);
+ uint64_t x113 = (x112 >> 0x17);
+ uint32_t x114 = ((uint32_t)x112 & 0x7fffff);
+ uint64_t x115 = (x113 + x67);
+ uint64_t x116 = (x115 >> 0x17);
+ uint32_t x117 = ((uint32_t)x115 & 0x7fffff);
+ uint64_t x118 = (x116 + x66);
+ uint64_t x119 = (x118 >> 0x17);
+ uint32_t x120 = ((uint32_t)x118 & 0x7fffff);
+ uint64_t x121 = (x119 + x65);
+ uint64_t x122 = (x121 >> 0x17);
+ uint32_t x123 = ((uint32_t)x121 & 0x7fffff);
+ uint64_t x124 = (x122 + x64);
uint32_t x125 = (uint32_t) (x124 >> 0x17);
- uint32_t x126 = (uint32_t) x124 & 0x7fffff;
- uint64_t x127 = x81 + (uint64_t) 0x19 * x125;
+ uint32_t x126 = ((uint32_t)x124 & 0x7fffff);
+ uint64_t x127 = (x81 + ((uint64_t)0x19 * x125));
uint32_t x128 = (uint32_t) (x127 >> 0x18);
- uint32_t x129 = (uint32_t) x127 & 0xffffff;
- uint32_t x130 = x128 + x84;
- uint32_t x131 = x130 >> 0x17;
- uint32_t x132 = x130 & 0x7fffff;
- return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, x131 + x87, Return x132, Return x129))
+ uint32_t x129 = ((uint32_t)x127 & 0xffffff);
+ uint32_t x130 = (x128 + x84);
+ uint32_t x131 = (x130 >> 0x17);
+ uint32_t x132 = (x130 & 0x7fffff);
+ return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, (x131 + x87), Return x132, Return x129))
(x, x0)%core
: word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e369m25/fesquare.c b/src/Specific/solinas32_2e369m25/fesquare.c
new file mode 100644
index 000000000..d04a598ec
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/fesquare.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x19 * ((uint64_t)x29 * x29)));
+{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x19 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x19 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x19 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x19 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x19 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x19 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x19 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+{ uint64_t x47 = (x46 >> 0x18);
+{ uint32_t x48 = ((uint32_t)x46 & 0xffffff);
+{ uint64_t x49 = (x47 + x45);
+{ uint64_t x50 = (x49 >> 0x17);
+{ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+{ uint64_t x52 = (x50 + x44);
+{ uint64_t x53 = (x52 >> 0x17);
+{ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+{ uint64_t x55 = (x53 + x43);
+{ uint64_t x56 = (x55 >> 0x17);
+{ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+{ uint64_t x58 = (x56 + x42);
+{ uint64_t x59 = (x58 >> 0x17);
+{ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+{ uint64_t x61 = (x59 + x41);
+{ uint64_t x62 = (x61 >> 0x17);
+{ uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
+{ uint64_t x64 = (x62 + x40);
+{ uint64_t x65 = (x64 >> 0x17);
+{ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+{ uint64_t x67 = (x65 + x39);
+{ uint64_t x68 = (x67 >> 0x17);
+{ uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
+{ uint64_t x70 = (x68 + x38);
+{ uint64_t x71 = (x70 >> 0x17);
+{ uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
+{ uint64_t x73 = (x71 + x37);
+{ uint64_t x74 = (x73 >> 0x17);
+{ uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
+{ uint64_t x76 = (x74 + x36);
+{ uint64_t x77 = (x76 >> 0x17);
+{ uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
+{ uint64_t x79 = (x77 + x35);
+{ uint64_t x80 = (x79 >> 0x17);
+{ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+{ uint64_t x82 = (x80 + x34);
+{ uint64_t x83 = (x82 >> 0x17);
+{ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+{ uint64_t x85 = (x83 + x33);
+{ uint64_t x86 = (x85 >> 0x17);
+{ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+{ uint64_t x88 = (x86 + x32);
+{ uint64_t x89 = (x88 >> 0x17);
+{ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+{ uint64_t x91 = (x89 + x31);
+{ uint32_t x92 = (uint32_t) (x91 >> 0x17);
+{ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+{ uint64_t x94 = (x48 + ((uint64_t)0x19 * x92));
+{ uint32_t x95 = (uint32_t) (x94 >> 0x18);
+{ uint32_t x96 = ((uint32_t)x94 & 0xffffff);
+{ uint32_t x97 = (x95 + x51);
+{ uint32_t x98 = (x97 >> 0x17);
+{ uint32_t x99 = (x97 & 0x7fffff);
+out[0] = x93;
+out[1] = x90;
+out[2] = x87;
+out[3] = x84;
+out[4] = x81;
+out[5] = x78;
+out[6] = x75;
+out[7] = x72;
+out[8] = x69;
+out[9] = x66;
+out[10] = x63;
+out[11] = x60;
+out[12] = x57;
+out[13] = x98 + x54;
+out[14] = x99;
+out[15] = x96;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e369m25/fesquare.h b/src/Specific/solinas32_2e369m25/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e369m25/fesquareDisplay.log b/src/Specific/solinas32_2e369m25/fesquareDisplay.log
index 2ea04c4f2..9c6b94bc1 100644
--- a/src/Specific/solinas32_2e369m25/fesquareDisplay.log
+++ b/src/Specific/solinas32_2e369m25/fesquareDisplay.log
@@ -2,75 +2,75 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
- uint64_t x31 = (uint64_t) x2 * x29 + (0x2 * ((uint64_t) x4 * x30) + (0x2 * ((uint64_t) x6 * x28) + (0x2 * ((uint64_t) x8 * x26) + (0x2 * ((uint64_t) x10 * x24) + (0x2 * ((uint64_t) x12 * x22) + (0x2 * ((uint64_t) x14 * x20) + (0x2 * ((uint64_t) x16 * x18) + (0x2 * ((uint64_t) x18 * x16) + (0x2 * ((uint64_t) x20 * x14) + (0x2 * ((uint64_t) x22 * x12) + (0x2 * ((uint64_t) x24 * x10) + (0x2 * ((uint64_t) x26 * x8) + (0x2 * ((uint64_t) x28 * x6) + (0x2 * ((uint64_t) x30 * x4) + (uint64_t) x29 * x2))))))))))))));
- uint64_t x32 = (uint64_t) x2 * x30 + (0x2 * ((uint64_t) x4 * x28) + (0x2 * ((uint64_t) x6 * x26) + (0x2 * ((uint64_t) x8 * x24) + (0x2 * ((uint64_t) x10 * x22) + (0x2 * ((uint64_t) x12 * x20) + (0x2 * ((uint64_t) x14 * x18) + (0x2 * ((uint64_t) x16 * x16) + (0x2 * ((uint64_t) x18 * x14) + (0x2 * ((uint64_t) x20 * x12) + (0x2 * ((uint64_t) x22 * x10) + (0x2 * ((uint64_t) x24 * x8) + (0x2 * ((uint64_t) x26 * x6) + (0x2 * ((uint64_t) x28 * x4) + (uint64_t) x30 * x2))))))))))))) + 0x19 * ((uint64_t) x29 * x29);
- uint64_t x33 = (uint64_t) x2 * x28 + (0x2 * ((uint64_t) x4 * x26) + (0x2 * ((uint64_t) x6 * x24) + (0x2 * ((uint64_t) x8 * x22) + (0x2 * ((uint64_t) x10 * x20) + (0x2 * ((uint64_t) x12 * x18) + (0x2 * ((uint64_t) x14 * x16) + (0x2 * ((uint64_t) x16 * x14) + (0x2 * ((uint64_t) x18 * x12) + (0x2 * ((uint64_t) x20 * x10) + (0x2 * ((uint64_t) x22 * x8) + (0x2 * ((uint64_t) x24 * x6) + (0x2 * ((uint64_t) x26 * x4) + (uint64_t) x28 * x2)))))))))))) + 0x19 * ((uint64_t) x30 * x29 + (uint64_t) x29 * x30);
- uint64_t x34 = (uint64_t) x2 * x26 + (0x2 * ((uint64_t) x4 * x24) + (0x2 * ((uint64_t) x6 * x22) + (0x2 * ((uint64_t) x8 * x20) + (0x2 * ((uint64_t) x10 * x18) + (0x2 * ((uint64_t) x12 * x16) + (0x2 * ((uint64_t) x14 * x14) + (0x2 * ((uint64_t) x16 * x12) + (0x2 * ((uint64_t) x18 * x10) + (0x2 * ((uint64_t) x20 * x8) + (0x2 * ((uint64_t) x22 * x6) + (0x2 * ((uint64_t) x24 * x4) + (uint64_t) x26 * x2))))))))))) + 0x19 * ((uint64_t) x28 * x29 + ((uint64_t) x30 * x30 + (uint64_t) x29 * x28));
- uint64_t x35 = (uint64_t) x2 * x24 + (0x2 * ((uint64_t) x4 * x22) + (0x2 * ((uint64_t) x6 * x20) + (0x2 * ((uint64_t) x8 * x18) + (0x2 * ((uint64_t) x10 * x16) + (0x2 * ((uint64_t) x12 * x14) + (0x2 * ((uint64_t) x14 * x12) + (0x2 * ((uint64_t) x16 * x10) + (0x2 * ((uint64_t) x18 * x8) + (0x2 * ((uint64_t) x20 * x6) + (0x2 * ((uint64_t) x22 * x4) + (uint64_t) x24 * x2)))))))))) + 0x19 * ((uint64_t) x26 * x29 + ((uint64_t) x28 * x30 + ((uint64_t) x30 * x28 + (uint64_t) x29 * x26)));
- uint64_t x36 = (uint64_t) x2 * x22 + (0x2 * ((uint64_t) x4 * x20) + (0x2 * ((uint64_t) x6 * x18) + (0x2 * ((uint64_t) x8 * x16) + (0x2 * ((uint64_t) x10 * x14) + (0x2 * ((uint64_t) x12 * x12) + (0x2 * ((uint64_t) x14 * x10) + (0x2 * ((uint64_t) x16 * x8) + (0x2 * ((uint64_t) x18 * x6) + (0x2 * ((uint64_t) x20 * x4) + (uint64_t) x22 * x2))))))))) + 0x19 * ((uint64_t) x24 * x29 + ((uint64_t) x26 * x30 + ((uint64_t) x28 * x28 + ((uint64_t) x30 * x26 + (uint64_t) x29 * x24))));
- uint64_t x37 = (uint64_t) x2 * x20 + (0x2 * ((uint64_t) x4 * x18) + (0x2 * ((uint64_t) x6 * x16) + (0x2 * ((uint64_t) x8 * x14) + (0x2 * ((uint64_t) x10 * x12) + (0x2 * ((uint64_t) x12 * x10) + (0x2 * ((uint64_t) x14 * x8) + (0x2 * ((uint64_t) x16 * x6) + (0x2 * ((uint64_t) x18 * x4) + (uint64_t) x20 * x2)))))))) + 0x19 * ((uint64_t) x22 * x29 + ((uint64_t) x24 * x30 + ((uint64_t) x26 * x28 + ((uint64_t) x28 * x26 + ((uint64_t) x30 * x24 + (uint64_t) x29 * x22)))));
- uint64_t x38 = (uint64_t) x2 * x18 + (0x2 * ((uint64_t) x4 * x16) + (0x2 * ((uint64_t) x6 * x14) + (0x2 * ((uint64_t) x8 * x12) + (0x2 * ((uint64_t) x10 * x10) + (0x2 * ((uint64_t) x12 * x8) + (0x2 * ((uint64_t) x14 * x6) + (0x2 * ((uint64_t) x16 * x4) + (uint64_t) x18 * x2))))))) + 0x19 * ((uint64_t) x20 * x29 + ((uint64_t) x22 * x30 + ((uint64_t) x24 * x28 + ((uint64_t) x26 * x26 + ((uint64_t) x28 * x24 + ((uint64_t) x30 * x22 + (uint64_t) x29 * x20))))));
- uint64_t x39 = (uint64_t) x2 * x16 + (0x2 * ((uint64_t) x4 * x14) + (0x2 * ((uint64_t) x6 * x12) + (0x2 * ((uint64_t) x8 * x10) + (0x2 * ((uint64_t) x10 * x8) + (0x2 * ((uint64_t) x12 * x6) + (0x2 * ((uint64_t) x14 * x4) + (uint64_t) x16 * x2)))))) + 0x19 * ((uint64_t) x18 * x29 + ((uint64_t) x20 * x30 + ((uint64_t) x22 * x28 + ((uint64_t) x24 * x26 + ((uint64_t) x26 * x24 + ((uint64_t) x28 * x22 + ((uint64_t) x30 * x20 + (uint64_t) x29 * x18)))))));
- uint64_t x40 = (uint64_t) x2 * x14 + (0x2 * ((uint64_t) x4 * x12) + (0x2 * ((uint64_t) x6 * x10) + (0x2 * ((uint64_t) x8 * x8) + (0x2 * ((uint64_t) x10 * x6) + (0x2 * ((uint64_t) x12 * x4) + (uint64_t) x14 * x2))))) + 0x19 * ((uint64_t) x16 * x29 + ((uint64_t) x18 * x30 + ((uint64_t) x20 * x28 + ((uint64_t) x22 * x26 + ((uint64_t) x24 * x24 + ((uint64_t) x26 * x22 + ((uint64_t) x28 * x20 + ((uint64_t) x30 * x18 + (uint64_t) x29 * x16))))))));
- uint64_t x41 = (uint64_t) x2 * x12 + (0x2 * ((uint64_t) x4 * x10) + (0x2 * ((uint64_t) x6 * x8) + (0x2 * ((uint64_t) x8 * x6) + (0x2 * ((uint64_t) x10 * x4) + (uint64_t) x12 * x2)))) + 0x19 * ((uint64_t) x14 * x29 + ((uint64_t) x16 * x30 + ((uint64_t) x18 * x28 + ((uint64_t) x20 * x26 + ((uint64_t) x22 * x24 + ((uint64_t) x24 * x22 + ((uint64_t) x26 * x20 + ((uint64_t) x28 * x18 + ((uint64_t) x30 * x16 + (uint64_t) x29 * x14)))))))));
- uint64_t x42 = (uint64_t) x2 * x10 + (0x2 * ((uint64_t) x4 * x8) + (0x2 * ((uint64_t) x6 * x6) + (0x2 * ((uint64_t) x8 * x4) + (uint64_t) x10 * x2))) + 0x19 * ((uint64_t) x12 * x29 + ((uint64_t) x14 * x30 + ((uint64_t) x16 * x28 + ((uint64_t) x18 * x26 + ((uint64_t) x20 * x24 + ((uint64_t) x22 * x22 + ((uint64_t) x24 * x20 + ((uint64_t) x26 * x18 + ((uint64_t) x28 * x16 + ((uint64_t) x30 * x14 + (uint64_t) x29 * x12))))))))));
- uint64_t x43 = (uint64_t) x2 * x8 + (0x2 * ((uint64_t) x4 * x6) + (0x2 * ((uint64_t) x6 * x4) + (uint64_t) x8 * x2)) + 0x19 * ((uint64_t) x10 * x29 + ((uint64_t) x12 * x30 + ((uint64_t) x14 * x28 + ((uint64_t) x16 * x26 + ((uint64_t) x18 * x24 + ((uint64_t) x20 * x22 + ((uint64_t) x22 * x20 + ((uint64_t) x24 * x18 + ((uint64_t) x26 * x16 + ((uint64_t) x28 * x14 + ((uint64_t) x30 * x12 + (uint64_t) x29 * x10)))))))))));
- uint64_t x44 = (uint64_t) x2 * x6 + (0x2 * ((uint64_t) x4 * x4) + (uint64_t) x6 * x2) + 0x19 * ((uint64_t) x8 * x29 + ((uint64_t) x10 * x30 + ((uint64_t) x12 * x28 + ((uint64_t) x14 * x26 + ((uint64_t) x16 * x24 + ((uint64_t) x18 * x22 + ((uint64_t) x20 * x20 + ((uint64_t) x22 * x18 + ((uint64_t) x24 * x16 + ((uint64_t) x26 * x14 + ((uint64_t) x28 * x12 + ((uint64_t) x30 * x10 + (uint64_t) x29 * x8))))))))))));
- uint64_t x45 = (uint64_t) x2 * x4 + (uint64_t) x4 * x2 + 0x19 * ((uint64_t) x6 * x29 + ((uint64_t) x8 * x30 + ((uint64_t) x10 * x28 + ((uint64_t) x12 * x26 + ((uint64_t) x14 * x24 + ((uint64_t) x16 * x22 + ((uint64_t) x18 * x20 + ((uint64_t) x20 * x18 + ((uint64_t) x22 * x16 + ((uint64_t) x24 * x14 + ((uint64_t) x26 * x12 + ((uint64_t) x28 * x10 + ((uint64_t) x30 * x8 + (uint64_t) x29 * x6)))))))))))));
- uint64_t x46 = (uint64_t) x2 * x2 + 0x19 * (0x2 * ((uint64_t) x4 * x29) + (0x2 * ((uint64_t) x6 * x30) + (0x2 * ((uint64_t) x8 * x28) + (0x2 * ((uint64_t) x10 * x26) + (0x2 * ((uint64_t) x12 * x24) + (0x2 * ((uint64_t) x14 * x22) + (0x2 * ((uint64_t) x16 * x20) + (0x2 * ((uint64_t) x18 * x18) + (0x2 * ((uint64_t) x20 * x16) + (0x2 * ((uint64_t) x22 * x14) + (0x2 * ((uint64_t) x24 * x12) + (0x2 * ((uint64_t) x26 * x10) + (0x2 * ((uint64_t) x28 * x8) + (0x2 * ((uint64_t) x30 * x6) + 0x2 * ((uint64_t) x29 * x4)))))))))))))));
- uint32_t x47 = (uint32_t) (x46 >> 0x18);
- uint32_t x48 = (uint32_t) x46 & 0xffffff;
- uint64_t x49 = x47 + x45;
- uint32_t x50 = (uint32_t) (x49 >> 0x17);
- uint32_t x51 = (uint32_t) x49 & 0x7fffff;
- uint64_t x52 = x50 + x44;
- uint32_t x53 = (uint32_t) (x52 >> 0x17);
- uint32_t x54 = (uint32_t) x52 & 0x7fffff;
- uint64_t x55 = x53 + x43;
- uint32_t x56 = (uint32_t) (x55 >> 0x17);
- uint32_t x57 = (uint32_t) x55 & 0x7fffff;
- uint64_t x58 = x56 + x42;
- uint32_t x59 = (uint32_t) (x58 >> 0x17);
- uint32_t x60 = (uint32_t) x58 & 0x7fffff;
- uint64_t x61 = x59 + x41;
- uint32_t x62 = (uint32_t) (x61 >> 0x17);
- uint32_t x63 = (uint32_t) x61 & 0x7fffff;
- uint64_t x64 = x62 + x40;
- uint32_t x65 = (uint32_t) (x64 >> 0x17);
- uint32_t x66 = (uint32_t) x64 & 0x7fffff;
- uint64_t x67 = x65 + x39;
- uint32_t x68 = (uint32_t) (x67 >> 0x17);
- uint32_t x69 = (uint32_t) x67 & 0x7fffff;
- uint64_t x70 = x68 + x38;
- uint32_t x71 = (uint32_t) (x70 >> 0x17);
- uint32_t x72 = (uint32_t) x70 & 0x7fffff;
- uint64_t x73 = x71 + x37;
- uint32_t x74 = (uint32_t) (x73 >> 0x17);
- uint32_t x75 = (uint32_t) x73 & 0x7fffff;
- uint64_t x76 = x74 + x36;
- uint32_t x77 = (uint32_t) (x76 >> 0x17);
- uint32_t x78 = (uint32_t) x76 & 0x7fffff;
- uint64_t x79 = x77 + x35;
- uint32_t x80 = (uint32_t) (x79 >> 0x17);
- uint32_t x81 = (uint32_t) x79 & 0x7fffff;
- uint64_t x82 = x80 + x34;
- uint32_t x83 = (uint32_t) (x82 >> 0x17);
- uint32_t x84 = (uint32_t) x82 & 0x7fffff;
- uint64_t x85 = x83 + x33;
- uint32_t x86 = (uint32_t) (x85 >> 0x17);
- uint32_t x87 = (uint32_t) x85 & 0x7fffff;
- uint64_t x88 = x86 + x32;
- uint32_t x89 = (uint32_t) (x88 >> 0x17);
- uint32_t x90 = (uint32_t) x88 & 0x7fffff;
- uint64_t x91 = x89 + x31;
+ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x19 * ((uint64_t)x29 * x29)));
+ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x19 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x19 * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x19 * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x19 * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x19 * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x19 * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x19 * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x19 * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x19 * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x19 * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x19 * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x19 * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x19 * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ uint64_t x46 = (((uint64_t)x2 * x2) + (0x19 * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ uint64_t x47 = (x46 >> 0x18);
+ uint32_t x48 = ((uint32_t)x46 & 0xffffff);
+ uint64_t x49 = (x47 + x45);
+ uint64_t x50 = (x49 >> 0x17);
+ uint32_t x51 = ((uint32_t)x49 & 0x7fffff);
+ uint64_t x52 = (x50 + x44);
+ uint64_t x53 = (x52 >> 0x17);
+ uint32_t x54 = ((uint32_t)x52 & 0x7fffff);
+ uint64_t x55 = (x53 + x43);
+ uint64_t x56 = (x55 >> 0x17);
+ uint32_t x57 = ((uint32_t)x55 & 0x7fffff);
+ uint64_t x58 = (x56 + x42);
+ uint64_t x59 = (x58 >> 0x17);
+ uint32_t x60 = ((uint32_t)x58 & 0x7fffff);
+ uint64_t x61 = (x59 + x41);
+ uint64_t x62 = (x61 >> 0x17);
+ uint32_t x63 = ((uint32_t)x61 & 0x7fffff);
+ uint64_t x64 = (x62 + x40);
+ uint64_t x65 = (x64 >> 0x17);
+ uint32_t x66 = ((uint32_t)x64 & 0x7fffff);
+ uint64_t x67 = (x65 + x39);
+ uint64_t x68 = (x67 >> 0x17);
+ uint32_t x69 = ((uint32_t)x67 & 0x7fffff);
+ uint64_t x70 = (x68 + x38);
+ uint64_t x71 = (x70 >> 0x17);
+ uint32_t x72 = ((uint32_t)x70 & 0x7fffff);
+ uint64_t x73 = (x71 + x37);
+ uint64_t x74 = (x73 >> 0x17);
+ uint32_t x75 = ((uint32_t)x73 & 0x7fffff);
+ uint64_t x76 = (x74 + x36);
+ uint64_t x77 = (x76 >> 0x17);
+ uint32_t x78 = ((uint32_t)x76 & 0x7fffff);
+ uint64_t x79 = (x77 + x35);
+ uint64_t x80 = (x79 >> 0x17);
+ uint32_t x81 = ((uint32_t)x79 & 0x7fffff);
+ uint64_t x82 = (x80 + x34);
+ uint64_t x83 = (x82 >> 0x17);
+ uint32_t x84 = ((uint32_t)x82 & 0x7fffff);
+ uint64_t x85 = (x83 + x33);
+ uint64_t x86 = (x85 >> 0x17);
+ uint32_t x87 = ((uint32_t)x85 & 0x7fffff);
+ uint64_t x88 = (x86 + x32);
+ uint64_t x89 = (x88 >> 0x17);
+ uint32_t x90 = ((uint32_t)x88 & 0x7fffff);
+ uint64_t x91 = (x89 + x31);
uint32_t x92 = (uint32_t) (x91 >> 0x17);
- uint32_t x93 = (uint32_t) x91 & 0x7fffff;
- uint64_t x94 = x48 + (uint64_t) 0x19 * x92;
+ uint32_t x93 = ((uint32_t)x91 & 0x7fffff);
+ uint64_t x94 = (x48 + ((uint64_t)0x19 * x92));
uint32_t x95 = (uint32_t) (x94 >> 0x18);
- uint32_t x96 = (uint32_t) x94 & 0xffffff;
- uint32_t x97 = x95 + x51;
- uint32_t x98 = x97 >> 0x17;
- uint32_t x99 = x97 & 0x7fffff;
- return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, x98 + x54, Return x99, Return x96))
+ uint32_t x96 = ((uint32_t)x94 & 0xffffff);
+ uint32_t x97 = (x95 + x51);
+ uint32_t x98 = (x97 >> 0x17);
+ uint32_t x99 = (x97 & 0x7fffff);
+ return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, (x98 + x54), Return x99, Return x96))
x
: word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e369m25/freeze.c b/src/Specific/solinas32_2e369m25/freeze.c
new file mode 100644
index 000000000..581df79d9
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e369m25/freeze.h b/src/Specific/solinas32_2e369m25/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e369m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e369m25/freezeDisplay.log b/src/Specific/solinas32_2e369m25/freezeDisplay.log
index 1989df374..ecff7c235 100644
--- a/src/Specific/solinas32_2e369m25/freezeDisplay.log
+++ b/src/Specific/solinas32_2e369m25/freezeDisplay.log
@@ -19,37 +19,37 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x7fffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x7fffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0xffffe7;
+ uint32_t x80 = (x79 & 0xffffe7);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint32_t x84 = x79 & 0x7fffff;
+ uint32_t x84 = (x79 & 0x7fffff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
- uint32_t x88 = x79 & 0x7fffff;
+ uint32_t x88 = (x79 & 0x7fffff);
uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
- uint32_t x92 = x79 & 0x7fffff;
+ uint32_t x92 = (x79 & 0x7fffff);
uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
- uint32_t x96 = x79 & 0x7fffff;
+ uint32_t x96 = (x79 & 0x7fffff);
uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
- uint32_t x100 = x79 & 0x7fffff;
+ uint32_t x100 = (x79 & 0x7fffff);
uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
- uint32_t x104 = x79 & 0x7fffff;
+ uint32_t x104 = (x79 & 0x7fffff);
uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
- uint32_t x108 = x79 & 0x7fffff;
+ uint32_t x108 = (x79 & 0x7fffff);
uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
- uint32_t x112 = x79 & 0x7fffff;
+ uint32_t x112 = (x79 & 0x7fffff);
uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
- uint32_t x116 = x79 & 0x7fffff;
+ uint32_t x116 = (x79 & 0x7fffff);
uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
- uint32_t x120 = x79 & 0x7fffff;
+ uint32_t x120 = (x79 & 0x7fffff);
uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
- uint32_t x124 = x79 & 0x7fffff;
+ uint32_t x124 = (x79 & 0x7fffff);
uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
- uint32_t x128 = x79 & 0x7fffff;
+ uint32_t x128 = (x79 & 0x7fffff);
uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
- uint32_t x132 = x79 & 0x7fffff;
+ uint32_t x132 = (x79 & 0x7fffff);
uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
- uint32_t x136 = x79 & 0x7fffff;
+ uint32_t x136 = (x79 & 0x7fffff);
uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
- uint32_t x140 = x79 & 0x7fffff;
+ uint32_t x140 = (x79 & 0x7fffff);
uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 23 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
(Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
diff --git a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c
new file mode 100644
index 000000000..200b9760d
--- /dev/null
+++ b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.h b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freezeDisplay.log b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freezeDisplay.log
index 5f8d74511..d5dad97fa 100644
--- a/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freezeDisplay.log
+++ b/src/Specific/solinas32_2e384m2e128m2e96p2e32m1/freezeDisplay.log
@@ -19,35 +19,35 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xffffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0xffffff;
+ uint32_t x80 = (x79 & 0xffffff);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint8_t x84 = (uint8_t) x79 & 0xff;
+ uint8_t x84 = ((uint8_t)x79 & 0xff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
uint32_t x89, uint8_t x90 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, 0x0);
uint32_t x92, uint8_t x93 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x90, Return x41, 0x0);
- uint32_t x94 = x79 & 0xffffff;
+ uint32_t x94 = (x79 & 0xffffff);
uint32_t x96, uint8_t x97 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x44, Return x94);
- uint32_t x98 = x79 & 0xfffeff;
+ uint32_t x98 = (x79 & 0xfffeff);
uint32_t x100, uint8_t x101 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x97, Return x47, Return x98);
- uint32_t x102 = x79 & 0xffffff;
+ uint32_t x102 = (x79 & 0xffffff);
uint32_t x104, uint8_t x105 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x101, Return x50, Return x102);
- uint32_t x106 = x79 & 0xffffff;
+ uint32_t x106 = (x79 & 0xffffff);
uint32_t x108, uint8_t x109 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x53, Return x106);
- uint32_t x110 = x79 & 0xffffff;
+ uint32_t x110 = (x79 & 0xffffff);
uint32_t x112, uint8_t x113 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x109, Return x56, Return x110);
- uint32_t x114 = x79 & 0xffffff;
+ uint32_t x114 = (x79 & 0xffffff);
uint32_t x116, uint8_t x117 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x113, Return x59, Return x114);
- uint32_t x118 = x79 & 0xffffff;
+ uint32_t x118 = (x79 & 0xffffff);
uint32_t x120, uint8_t x121 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x62, Return x118);
- uint32_t x122 = x79 & 0xffffff;
+ uint32_t x122 = (x79 & 0xffffff);
uint32_t x124, uint8_t x125 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x121, Return x65, Return x122);
- uint32_t x126 = x79 & 0xffffff;
+ uint32_t x126 = (x79 & 0xffffff);
uint32_t x128, uint8_t x129 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x125, Return x68, Return x126);
- uint32_t x130 = x79 & 0xffffff;
+ uint32_t x130 = (x79 & 0xffffff);
uint32_t x132, uint8_t x133 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x129, Return x71, Return x130);
- uint32_t x134 = x79 & 0xffffff;
+ uint32_t x134 = (x79 & 0xffffff);
uint32_t x136, uint8_t x137 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x133, Return x74, Return x134);
- uint32_t x138 = x79 & 0xffffff;
+ uint32_t x138 = (x79 & 0xffffff);
uint32_t x140, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x137, Return x77, Return x138);
(Return x140, Return x136, Return x132, Return x128, Return x124, Return x120, Return x116, Return x112, Return x108, Return x104, Return x100, Return x96, Return x92, Return x89, Return x86, Return x82))
x
diff --git a/src/Specific/solinas32_2e384m5x2e368m1/freeze.c b/src/Specific/solinas32_2e384m5x2e368m1/freeze.c
new file mode 100644
index 000000000..200b9760d
--- /dev/null
+++ b/src/Specific/solinas32_2e384m5x2e368m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m5x2e368m1/freeze.h b/src/Specific/solinas32_2e384m5x2e368m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e384m5x2e368m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e384m5x2e368m1/freezeDisplay.log b/src/Specific/solinas32_2e384m5x2e368m1/freezeDisplay.log
new file mode 100644
index 000000000..c3fb8b3a9
--- /dev/null
+++ b/src/Specific/solinas32_2e384m5x2e368m1/freezeDisplay.log
@@ -0,0 +1,56 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xffffff);
+ uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xffffff);
+ uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xffffff);
+ uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xffffff);
+ uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xffffff);
+ uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xffffff);
+ uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xffffff);
+ uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffff);
+ uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xffffff);
+ uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xffffff);
+ uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xffffff);
+ uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xffffff);
+ uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xffffff);
+ uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
+ uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, Const 16775935);
+ uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ uint32_t x80 = (x79 & 0xffffff);
+ uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ uint32_t x84 = (x79 & 0xffffff);
+ uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ uint32_t x88 = (x79 & 0xffffff);
+ uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ uint32_t x92 = (x79 & 0xffffff);
+ uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ uint32_t x96 = (x79 & 0xffffff);
+ uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ uint32_t x100 = (x79 & 0xffffff);
+ uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ uint32_t x104 = (x79 & 0xffffff);
+ uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ uint32_t x108 = (x79 & 0xffffff);
+ uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ uint32_t x112 = (x79 & 0xffffff);
+ uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ uint32_t x116 = (x79 & 0xffffff);
+ uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ uint32_t x120 = (x79 & 0xffffff);
+ uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ uint32_t x124 = (x79 & 0xffffff);
+ uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ uint32_t x128 = (x79 & 0xffffff);
+ uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ uint32_t x132 = (x79 & 0xffffff);
+ uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ uint32_t x136 = (x79 & 0xffffff);
+ uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ uint32_t x140 = (x79 & Const 16775935);
+ uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ (Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e384m79x2e376m1/freeze.c b/src/Specific/solinas32_2e384m79x2e376m1/freeze.c
new file mode 100644
index 000000000..200b9760d
--- /dev/null
+++ b/src/Specific/solinas32_2e384m79x2e376m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 24 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e384m79x2e376m1/freeze.h b/src/Specific/solinas32_2e384m79x2e376m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e384m79x2e376m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e384m79x2e376m1/freezeDisplay.log b/src/Specific/solinas32_2e384m79x2e376m1/freezeDisplay.log
new file mode 100644
index 000000000..f8e788dd9
--- /dev/null
+++ b/src/Specific/solinas32_2e384m79x2e376m1/freezeDisplay.log
@@ -0,0 +1,56 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0xffffff);
+ uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xffffff);
+ uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xffffff);
+ uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xffffff);
+ uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0xffffff);
+ uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xffffff);
+ uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xffffff);
+ uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xffffff);
+ uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0xffffff);
+ uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xffffff);
+ uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xffffff);
+ uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xffffff);
+ uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0xffffff);
+ uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xffffff);
+ uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xffffff);
+ uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, Const 11599871);
+ uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ uint32_t x80 = (x79 & 0xffffff);
+ uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ uint32_t x84 = (x79 & 0xffffff);
+ uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ uint32_t x88 = (x79 & 0xffffff);
+ uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ uint32_t x92 = (x79 & 0xffffff);
+ uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ uint32_t x96 = (x79 & 0xffffff);
+ uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ uint32_t x100 = (x79 & 0xffffff);
+ uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ uint32_t x104 = (x79 & 0xffffff);
+ uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ uint32_t x108 = (x79 & 0xffffff);
+ uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ uint32_t x112 = (x79 & 0xffffff);
+ uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ uint32_t x116 = (x79 & 0xffffff);
+ uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ uint32_t x120 = (x79 & 0xffffff);
+ uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ uint32_t x124 = (x79 & 0xffffff);
+ uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ uint32_t x128 = (x79 & 0xffffff);
+ uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ uint32_t x132 = (x79 & 0xffffff);
+ uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ uint32_t x136 = (x79 & 0xffffff);
+ uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ uint32_t x140 = (x79 & Const 11599871);
+ uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 24 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ (Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e401m31/femul.c b/src/Specific/solinas32_2e401m31/femul.c
new file mode 100644
index 000000000..df691305f
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/femul.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+{ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x1f * ((uint64_t)x32 * x62)));
+{ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x1f * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+{ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x1f * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+{ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x1f * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+{ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x1f * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+{ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x1f * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+{ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x1f * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+{ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x1f * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+{ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x1f * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+{ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x1f * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+{ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x1f * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+{ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x1f * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+{ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x1f * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+{ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x1f * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+{ uint64_t x79 = (((uint64_t)x5 * x35) + (0x1f * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+{ uint64_t x80 = (x79 >> 0x1a);
+{ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+{ uint64_t x82 = (x80 + x78);
+{ uint64_t x83 = (x82 >> 0x19);
+{ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+{ uint64_t x85 = (x83 + x77);
+{ uint64_t x86 = (x85 >> 0x19);
+{ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+{ uint64_t x88 = (x86 + x76);
+{ uint64_t x89 = (x88 >> 0x19);
+{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+{ uint64_t x91 = (x89 + x75);
+{ uint64_t x92 = (x91 >> 0x19);
+{ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+{ uint64_t x94 = (x92 + x74);
+{ uint64_t x95 = (x94 >> 0x19);
+{ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
+{ uint64_t x97 = (x95 + x73);
+{ uint64_t x98 = (x97 >> 0x19);
+{ uint32_t x99 = ((uint32_t)x97 & 0x1ffffff);
+{ uint64_t x100 = (x98 + x72);
+{ uint64_t x101 = (x100 >> 0x19);
+{ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
+{ uint64_t x103 = (x101 + x71);
+{ uint64_t x104 = (x103 >> 0x19);
+{ uint32_t x105 = ((uint32_t)x103 & 0x1ffffff);
+{ uint64_t x106 = (x104 + x70);
+{ uint64_t x107 = (x106 >> 0x19);
+{ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
+{ uint64_t x109 = (x107 + x69);
+{ uint64_t x110 = (x109 >> 0x19);
+{ uint32_t x111 = ((uint32_t)x109 & 0x1ffffff);
+{ uint64_t x112 = (x110 + x68);
+{ uint64_t x113 = (x112 >> 0x19);
+{ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
+{ uint64_t x115 = (x113 + x67);
+{ uint64_t x116 = (x115 >> 0x19);
+{ uint32_t x117 = ((uint32_t)x115 & 0x1ffffff);
+{ uint64_t x118 = (x116 + x66);
+{ uint64_t x119 = (x118 >> 0x19);
+{ uint32_t x120 = ((uint32_t)x118 & 0x1ffffff);
+{ uint64_t x121 = (x119 + x65);
+{ uint64_t x122 = (x121 >> 0x19);
+{ uint32_t x123 = ((uint32_t)x121 & 0x1ffffff);
+{ uint64_t x124 = (x122 + x64);
+{ uint64_t x125 = (x124 >> 0x19);
+{ uint32_t x126 = ((uint32_t)x124 & 0x1ffffff);
+{ uint64_t x127 = (x81 + (0x1f * x125));
+{ uint32_t x128 = (uint32_t) (x127 >> 0x1a);
+{ uint32_t x129 = ((uint32_t)x127 & 0x3ffffff);
+{ uint32_t x130 = (x128 + x84);
+{ uint32_t x131 = (x130 >> 0x19);
+{ uint32_t x132 = (x130 & 0x1ffffff);
+out[0] = x126;
+out[1] = x123;
+out[2] = x120;
+out[3] = x117;
+out[4] = x114;
+out[5] = x111;
+out[6] = x108;
+out[7] = x105;
+out[8] = x102;
+out[9] = x99;
+out[10] = x96;
+out[11] = x93;
+out[12] = x90;
+out[13] = x131 + x87;
+out[14] = x132;
+out[15] = x129;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e401m31/femul.h b/src/Specific/solinas32_2e401m31/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e401m31/femulDisplay.log b/src/Specific/solinas32_2e401m31/femulDisplay.log
index 7ac5e062b..5d597ba6a 100644
--- a/src/Specific/solinas32_2e401m31/femulDisplay.log
+++ b/src/Specific/solinas32_2e401m31/femulDisplay.log
@@ -2,75 +2,75 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
- uint64_t x64 = (uint64_t) x5 * x62 + (0x2 * ((uint64_t) x7 * x63) + (0x2 * ((uint64_t) x9 * x61) + (0x2 * ((uint64_t) x11 * x59) + (0x2 * ((uint64_t) x13 * x57) + (0x2 * ((uint64_t) x15 * x55) + (0x2 * ((uint64_t) x17 * x53) + (0x2 * ((uint64_t) x19 * x51) + (0x2 * ((uint64_t) x21 * x49) + (0x2 * ((uint64_t) x23 * x47) + (0x2 * ((uint64_t) x25 * x45) + (0x2 * ((uint64_t) x27 * x43) + (0x2 * ((uint64_t) x29 * x41) + (0x2 * ((uint64_t) x31 * x39) + (0x2 * ((uint64_t) x33 * x37) + (uint64_t) x32 * x35))))))))))))));
- uint64_t x65 = (uint64_t) x5 * x63 + (0x2 * ((uint64_t) x7 * x61) + (0x2 * ((uint64_t) x9 * x59) + (0x2 * ((uint64_t) x11 * x57) + (0x2 * ((uint64_t) x13 * x55) + (0x2 * ((uint64_t) x15 * x53) + (0x2 * ((uint64_t) x17 * x51) + (0x2 * ((uint64_t) x19 * x49) + (0x2 * ((uint64_t) x21 * x47) + (0x2 * ((uint64_t) x23 * x45) + (0x2 * ((uint64_t) x25 * x43) + (0x2 * ((uint64_t) x27 * x41) + (0x2 * ((uint64_t) x29 * x39) + (0x2 * ((uint64_t) x31 * x37) + (uint64_t) x33 * x35))))))))))))) + 0x1f * ((uint64_t) x32 * x62);
- uint64_t x66 = (uint64_t) x5 * x61 + (0x2 * ((uint64_t) x7 * x59) + (0x2 * ((uint64_t) x9 * x57) + (0x2 * ((uint64_t) x11 * x55) + (0x2 * ((uint64_t) x13 * x53) + (0x2 * ((uint64_t) x15 * x51) + (0x2 * ((uint64_t) x17 * x49) + (0x2 * ((uint64_t) x19 * x47) + (0x2 * ((uint64_t) x21 * x45) + (0x2 * ((uint64_t) x23 * x43) + (0x2 * ((uint64_t) x25 * x41) + (0x2 * ((uint64_t) x27 * x39) + (0x2 * ((uint64_t) x29 * x37) + (uint64_t) x31 * x35)))))))))))) + 0x1f * ((uint64_t) x33 * x62 + (uint64_t) x32 * x63);
- uint64_t x67 = (uint64_t) x5 * x59 + (0x2 * ((uint64_t) x7 * x57) + (0x2 * ((uint64_t) x9 * x55) + (0x2 * ((uint64_t) x11 * x53) + (0x2 * ((uint64_t) x13 * x51) + (0x2 * ((uint64_t) x15 * x49) + (0x2 * ((uint64_t) x17 * x47) + (0x2 * ((uint64_t) x19 * x45) + (0x2 * ((uint64_t) x21 * x43) + (0x2 * ((uint64_t) x23 * x41) + (0x2 * ((uint64_t) x25 * x39) + (0x2 * ((uint64_t) x27 * x37) + (uint64_t) x29 * x35))))))))))) + 0x1f * ((uint64_t) x31 * x62 + ((uint64_t) x33 * x63 + (uint64_t) x32 * x61));
- uint64_t x68 = (uint64_t) x5 * x57 + (0x2 * ((uint64_t) x7 * x55) + (0x2 * ((uint64_t) x9 * x53) + (0x2 * ((uint64_t) x11 * x51) + (0x2 * ((uint64_t) x13 * x49) + (0x2 * ((uint64_t) x15 * x47) + (0x2 * ((uint64_t) x17 * x45) + (0x2 * ((uint64_t) x19 * x43) + (0x2 * ((uint64_t) x21 * x41) + (0x2 * ((uint64_t) x23 * x39) + (0x2 * ((uint64_t) x25 * x37) + (uint64_t) x27 * x35)))))))))) + 0x1f * ((uint64_t) x29 * x62 + ((uint64_t) x31 * x63 + ((uint64_t) x33 * x61 + (uint64_t) x32 * x59)));
- uint64_t x69 = (uint64_t) x5 * x55 + (0x2 * ((uint64_t) x7 * x53) + (0x2 * ((uint64_t) x9 * x51) + (0x2 * ((uint64_t) x11 * x49) + (0x2 * ((uint64_t) x13 * x47) + (0x2 * ((uint64_t) x15 * x45) + (0x2 * ((uint64_t) x17 * x43) + (0x2 * ((uint64_t) x19 * x41) + (0x2 * ((uint64_t) x21 * x39) + (0x2 * ((uint64_t) x23 * x37) + (uint64_t) x25 * x35))))))))) + 0x1f * ((uint64_t) x27 * x62 + ((uint64_t) x29 * x63 + ((uint64_t) x31 * x61 + ((uint64_t) x33 * x59 + (uint64_t) x32 * x57))));
- uint64_t x70 = (uint64_t) x5 * x53 + (0x2 * ((uint64_t) x7 * x51) + (0x2 * ((uint64_t) x9 * x49) + (0x2 * ((uint64_t) x11 * x47) + (0x2 * ((uint64_t) x13 * x45) + (0x2 * ((uint64_t) x15 * x43) + (0x2 * ((uint64_t) x17 * x41) + (0x2 * ((uint64_t) x19 * x39) + (0x2 * ((uint64_t) x21 * x37) + (uint64_t) x23 * x35)))))))) + 0x1f * ((uint64_t) x25 * x62 + ((uint64_t) x27 * x63 + ((uint64_t) x29 * x61 + ((uint64_t) x31 * x59 + ((uint64_t) x33 * x57 + (uint64_t) x32 * x55)))));
- uint64_t x71 = (uint64_t) x5 * x51 + (0x2 * ((uint64_t) x7 * x49) + (0x2 * ((uint64_t) x9 * x47) + (0x2 * ((uint64_t) x11 * x45) + (0x2 * ((uint64_t) x13 * x43) + (0x2 * ((uint64_t) x15 * x41) + (0x2 * ((uint64_t) x17 * x39) + (0x2 * ((uint64_t) x19 * x37) + (uint64_t) x21 * x35))))))) + 0x1f * ((uint64_t) x23 * x62 + ((uint64_t) x25 * x63 + ((uint64_t) x27 * x61 + ((uint64_t) x29 * x59 + ((uint64_t) x31 * x57 + ((uint64_t) x33 * x55 + (uint64_t) x32 * x53))))));
- uint64_t x72 = (uint64_t) x5 * x49 + (0x2 * ((uint64_t) x7 * x47) + (0x2 * ((uint64_t) x9 * x45) + (0x2 * ((uint64_t) x11 * x43) + (0x2 * ((uint64_t) x13 * x41) + (0x2 * ((uint64_t) x15 * x39) + (0x2 * ((uint64_t) x17 * x37) + (uint64_t) x19 * x35)))))) + 0x1f * ((uint64_t) x21 * x62 + ((uint64_t) x23 * x63 + ((uint64_t) x25 * x61 + ((uint64_t) x27 * x59 + ((uint64_t) x29 * x57 + ((uint64_t) x31 * x55 + ((uint64_t) x33 * x53 + (uint64_t) x32 * x51)))))));
- uint64_t x73 = (uint64_t) x5 * x47 + (0x2 * ((uint64_t) x7 * x45) + (0x2 * ((uint64_t) x9 * x43) + (0x2 * ((uint64_t) x11 * x41) + (0x2 * ((uint64_t) x13 * x39) + (0x2 * ((uint64_t) x15 * x37) + (uint64_t) x17 * x35))))) + 0x1f * ((uint64_t) x19 * x62 + ((uint64_t) x21 * x63 + ((uint64_t) x23 * x61 + ((uint64_t) x25 * x59 + ((uint64_t) x27 * x57 + ((uint64_t) x29 * x55 + ((uint64_t) x31 * x53 + ((uint64_t) x33 * x51 + (uint64_t) x32 * x49))))))));
- uint64_t x74 = (uint64_t) x5 * x45 + (0x2 * ((uint64_t) x7 * x43) + (0x2 * ((uint64_t) x9 * x41) + (0x2 * ((uint64_t) x11 * x39) + (0x2 * ((uint64_t) x13 * x37) + (uint64_t) x15 * x35)))) + 0x1f * ((uint64_t) x17 * x62 + ((uint64_t) x19 * x63 + ((uint64_t) x21 * x61 + ((uint64_t) x23 * x59 + ((uint64_t) x25 * x57 + ((uint64_t) x27 * x55 + ((uint64_t) x29 * x53 + ((uint64_t) x31 * x51 + ((uint64_t) x33 * x49 + (uint64_t) x32 * x47)))))))));
- uint64_t x75 = (uint64_t) x5 * x43 + (0x2 * ((uint64_t) x7 * x41) + (0x2 * ((uint64_t) x9 * x39) + (0x2 * ((uint64_t) x11 * x37) + (uint64_t) x13 * x35))) + 0x1f * ((uint64_t) x15 * x62 + ((uint64_t) x17 * x63 + ((uint64_t) x19 * x61 + ((uint64_t) x21 * x59 + ((uint64_t) x23 * x57 + ((uint64_t) x25 * x55 + ((uint64_t) x27 * x53 + ((uint64_t) x29 * x51 + ((uint64_t) x31 * x49 + ((uint64_t) x33 * x47 + (uint64_t) x32 * x45))))))))));
- uint64_t x76 = (uint64_t) x5 * x41 + (0x2 * ((uint64_t) x7 * x39) + (0x2 * ((uint64_t) x9 * x37) + (uint64_t) x11 * x35)) + 0x1f * ((uint64_t) x13 * x62 + ((uint64_t) x15 * x63 + ((uint64_t) x17 * x61 + ((uint64_t) x19 * x59 + ((uint64_t) x21 * x57 + ((uint64_t) x23 * x55 + ((uint64_t) x25 * x53 + ((uint64_t) x27 * x51 + ((uint64_t) x29 * x49 + ((uint64_t) x31 * x47 + ((uint64_t) x33 * x45 + (uint64_t) x32 * x43)))))))))));
- uint64_t x77 = (uint64_t) x5 * x39 + (0x2 * ((uint64_t) x7 * x37) + (uint64_t) x9 * x35) + 0x1f * ((uint64_t) x11 * x62 + ((uint64_t) x13 * x63 + ((uint64_t) x15 * x61 + ((uint64_t) x17 * x59 + ((uint64_t) x19 * x57 + ((uint64_t) x21 * x55 + ((uint64_t) x23 * x53 + ((uint64_t) x25 * x51 + ((uint64_t) x27 * x49 + ((uint64_t) x29 * x47 + ((uint64_t) x31 * x45 + ((uint64_t) x33 * x43 + (uint64_t) x32 * x41))))))))))));
- uint64_t x78 = (uint64_t) x5 * x37 + (uint64_t) x7 * x35 + 0x1f * ((uint64_t) x9 * x62 + ((uint64_t) x11 * x63 + ((uint64_t) x13 * x61 + ((uint64_t) x15 * x59 + ((uint64_t) x17 * x57 + ((uint64_t) x19 * x55 + ((uint64_t) x21 * x53 + ((uint64_t) x23 * x51 + ((uint64_t) x25 * x49 + ((uint64_t) x27 * x47 + ((uint64_t) x29 * x45 + ((uint64_t) x31 * x43 + ((uint64_t) x33 * x41 + (uint64_t) x32 * x39)))))))))))));
- uint64_t x79 = (uint64_t) x5 * x35 + 0x1f * (0x2 * ((uint64_t) x7 * x62) + (0x2 * ((uint64_t) x9 * x63) + (0x2 * ((uint64_t) x11 * x61) + (0x2 * ((uint64_t) x13 * x59) + (0x2 * ((uint64_t) x15 * x57) + (0x2 * ((uint64_t) x17 * x55) + (0x2 * ((uint64_t) x19 * x53) + (0x2 * ((uint64_t) x21 * x51) + (0x2 * ((uint64_t) x23 * x49) + (0x2 * ((uint64_t) x25 * x47) + (0x2 * ((uint64_t) x27 * x45) + (0x2 * ((uint64_t) x29 * x43) + (0x2 * ((uint64_t) x31 * x41) + (0x2 * ((uint64_t) x33 * x39) + 0x2 * ((uint64_t) x32 * x37)))))))))))))));
- uint64_t x80 = x79 >> 0x1a;
- uint32_t x81 = (uint32_t) x79 & 0x3ffffff;
- uint64_t x82 = x80 + x78;
- uint64_t x83 = x82 >> 0x19;
- uint32_t x84 = (uint32_t) x82 & 0x1ffffff;
- uint64_t x85 = x83 + x77;
- uint64_t x86 = x85 >> 0x19;
- uint32_t x87 = (uint32_t) x85 & 0x1ffffff;
- uint64_t x88 = x86 + x76;
- uint64_t x89 = x88 >> 0x19;
- uint32_t x90 = (uint32_t) x88 & 0x1ffffff;
- uint64_t x91 = x89 + x75;
- uint64_t x92 = x91 >> 0x19;
- uint32_t x93 = (uint32_t) x91 & 0x1ffffff;
- uint64_t x94 = x92 + x74;
- uint64_t x95 = x94 >> 0x19;
- uint32_t x96 = (uint32_t) x94 & 0x1ffffff;
- uint64_t x97 = x95 + x73;
- uint64_t x98 = x97 >> 0x19;
- uint32_t x99 = (uint32_t) x97 & 0x1ffffff;
- uint64_t x100 = x98 + x72;
- uint64_t x101 = x100 >> 0x19;
- uint32_t x102 = (uint32_t) x100 & 0x1ffffff;
- uint64_t x103 = x101 + x71;
- uint64_t x104 = x103 >> 0x19;
- uint32_t x105 = (uint32_t) x103 & 0x1ffffff;
- uint64_t x106 = x104 + x70;
- uint64_t x107 = x106 >> 0x19;
- uint32_t x108 = (uint32_t) x106 & 0x1ffffff;
- uint64_t x109 = x107 + x69;
- uint64_t x110 = x109 >> 0x19;
- uint32_t x111 = (uint32_t) x109 & 0x1ffffff;
- uint64_t x112 = x110 + x68;
- uint64_t x113 = x112 >> 0x19;
- uint32_t x114 = (uint32_t) x112 & 0x1ffffff;
- uint64_t x115 = x113 + x67;
- uint64_t x116 = x115 >> 0x19;
- uint32_t x117 = (uint32_t) x115 & 0x1ffffff;
- uint64_t x118 = x116 + x66;
- uint32_t x119 = (uint32_t) (x118 >> 0x19);
- uint32_t x120 = (uint32_t) x118 & 0x1ffffff;
- uint64_t x121 = x119 + x65;
- uint32_t x122 = (uint32_t) (x121 >> 0x19);
- uint32_t x123 = (uint32_t) x121 & 0x1ffffff;
- uint64_t x124 = x122 + x64;
- uint32_t x125 = (uint32_t) (x124 >> 0x19);
- uint32_t x126 = (uint32_t) x124 & 0x1ffffff;
- uint64_t x127 = x81 + (uint64_t) 0x1f * x125;
+ uint64_t x64 = (((uint64_t)x5 * x62) + ((0x2 * ((uint64_t)x7 * x63)) + ((0x2 * ((uint64_t)x9 * x61)) + ((0x2 * ((uint64_t)x11 * x59)) + ((0x2 * ((uint64_t)x13 * x57)) + ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + ((0x2 * ((uint64_t)x19 * x51)) + ((0x2 * ((uint64_t)x21 * x49)) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + ((0x2 * ((uint64_t)x27 * x43)) + ((0x2 * ((uint64_t)x29 * x41)) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ uint64_t x65 = ((((uint64_t)x5 * x63) + ((0x2 * ((uint64_t)x7 * x61)) + ((0x2 * ((uint64_t)x9 * x59)) + ((0x2 * ((uint64_t)x11 * x57)) + ((0x2 * ((uint64_t)x13 * x55)) + ((0x2 * ((uint64_t)x15 * x53)) + ((0x2 * ((uint64_t)x17 * x51)) + ((0x2 * ((uint64_t)x19 * x49)) + ((0x2 * ((uint64_t)x21 * x47)) + ((0x2 * ((uint64_t)x23 * x45)) + ((0x2 * ((uint64_t)x25 * x43)) + ((0x2 * ((uint64_t)x27 * x41)) + ((0x2 * ((uint64_t)x29 * x39)) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) + (0x1f * ((uint64_t)x32 * x62)));
+ uint64_t x66 = ((((uint64_t)x5 * x61) + ((0x2 * ((uint64_t)x7 * x59)) + ((0x2 * ((uint64_t)x9 * x57)) + ((0x2 * ((uint64_t)x11 * x55)) + ((0x2 * ((uint64_t)x13 * x53)) + ((0x2 * ((uint64_t)x15 * x51)) + ((0x2 * ((uint64_t)x17 * x49)) + ((0x2 * ((uint64_t)x19 * x47)) + ((0x2 * ((uint64_t)x21 * x45)) + ((0x2 * ((uint64_t)x23 * x43)) + ((0x2 * ((uint64_t)x25 * x41)) + ((0x2 * ((uint64_t)x27 * x39)) + ((0x2 * ((uint64_t)x29 * x37)) + ((uint64_t)x31 * x35)))))))))))))) + (0x1f * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ uint64_t x67 = ((((uint64_t)x5 * x59) + ((0x2 * ((uint64_t)x7 * x57)) + ((0x2 * ((uint64_t)x9 * x55)) + ((0x2 * ((uint64_t)x11 * x53)) + ((0x2 * ((uint64_t)x13 * x51)) + ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + ((0x2 * ((uint64_t)x21 * x43)) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) + (0x1f * (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))));
+ uint64_t x68 = ((((uint64_t)x5 * x57) + ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + ((0x2 * ((uint64_t)x11 * x51)) + ((0x2 * ((uint64_t)x13 * x49)) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + ((0x2 * ((uint64_t)x19 * x43)) + ((0x2 * ((uint64_t)x21 * x41)) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) + (0x1f * (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))));
+ uint64_t x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + ((0x2 * ((uint64_t)x9 * x51)) + ((0x2 * ((uint64_t)x11 * x49)) + ((0x2 * ((uint64_t)x13 * x47)) + ((0x2 * ((uint64_t)x15 * x45)) + ((0x2 * ((uint64_t)x17 * x43)) + ((0x2 * ((uint64_t)x19 * x41)) + ((0x2 * ((uint64_t)x21 * x39)) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) + (0x1f * (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ uint64_t x70 = ((((uint64_t)x5 * x53) + ((0x2 * ((uint64_t)x7 * x51)) + ((0x2 * ((uint64_t)x9 * x49)) + ((0x2 * ((uint64_t)x11 * x47)) + ((0x2 * ((uint64_t)x13 * x45)) + ((0x2 * ((uint64_t)x15 * x43)) + ((0x2 * ((uint64_t)x17 * x41)) + ((0x2 * ((uint64_t)x19 * x39)) + ((0x2 * ((uint64_t)x21 * x37)) + ((uint64_t)x23 * x35)))))))))) + (0x1f * (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ uint64_t x71 = ((((uint64_t)x5 * x51) + ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + ((0x2 * ((uint64_t)x13 * x43)) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) + (0x1f * (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ uint64_t x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + ((0x2 * ((uint64_t)x11 * x43)) + ((0x2 * ((uint64_t)x13 * x41)) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) + (0x1f * (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ uint64_t x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + ((0x2 * ((uint64_t)x9 * x43)) + ((0x2 * ((uint64_t)x11 * x41)) + ((0x2 * ((uint64_t)x13 * x39)) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) + (0x1f * (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ uint64_t x74 = ((((uint64_t)x5 * x45) + ((0x2 * ((uint64_t)x7 * x43)) + ((0x2 * ((uint64_t)x9 * x41)) + ((0x2 * ((uint64_t)x11 * x39)) + ((0x2 * ((uint64_t)x13 * x37)) + ((uint64_t)x15 * x35)))))) + (0x1f * (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ uint64_t x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) + (0x1f * (((uint64_t)x15 * x62) + (((uint64_t)x17 * x63) + (((uint64_t)x19 * x61) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + (((uint64_t)x29 * x51) + (((uint64_t)x31 * x49) + (((uint64_t)x33 * x47) + ((uint64_t)x32 * x45)))))))))))));
+ uint64_t x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) + (0x1f * (((uint64_t)x13 * x62) + (((uint64_t)x15 * x63) + (((uint64_t)x17 * x61) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + (((uint64_t)x31 * x47) + (((uint64_t)x33 * x45) + ((uint64_t)x32 * x43))))))))))))));
+ uint64_t x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) + (0x1f * (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + (((uint64_t)x15 * x61) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + (((uint64_t)x31 * x45) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ uint64_t x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (0x1f * (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ uint64_t x79 = (((uint64_t)x5 * x35) + (0x1f * ((0x2 * ((uint64_t)x7 * x62)) + ((0x2 * ((uint64_t)x9 * x63)) + ((0x2 * ((uint64_t)x11 * x61)) + ((0x2 * ((uint64_t)x13 * x59)) + ((0x2 * ((uint64_t)x15 * x57)) + ((0x2 * ((uint64_t)x17 * x55)) + ((0x2 * ((uint64_t)x19 * x53)) + ((0x2 * ((uint64_t)x21 * x51)) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + ((0x2 * ((uint64_t)x29 * x43)) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ uint64_t x80 = (x79 >> 0x1a);
+ uint32_t x81 = ((uint32_t)x79 & 0x3ffffff);
+ uint64_t x82 = (x80 + x78);
+ uint64_t x83 = (x82 >> 0x19);
+ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+ uint64_t x85 = (x83 + x77);
+ uint64_t x86 = (x85 >> 0x19);
+ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+ uint64_t x88 = (x86 + x76);
+ uint64_t x89 = (x88 >> 0x19);
+ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+ uint64_t x91 = (x89 + x75);
+ uint64_t x92 = (x91 >> 0x19);
+ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+ uint64_t x94 = (x92 + x74);
+ uint64_t x95 = (x94 >> 0x19);
+ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
+ uint64_t x97 = (x95 + x73);
+ uint64_t x98 = (x97 >> 0x19);
+ uint32_t x99 = ((uint32_t)x97 & 0x1ffffff);
+ uint64_t x100 = (x98 + x72);
+ uint64_t x101 = (x100 >> 0x19);
+ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
+ uint64_t x103 = (x101 + x71);
+ uint64_t x104 = (x103 >> 0x19);
+ uint32_t x105 = ((uint32_t)x103 & 0x1ffffff);
+ uint64_t x106 = (x104 + x70);
+ uint64_t x107 = (x106 >> 0x19);
+ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
+ uint64_t x109 = (x107 + x69);
+ uint64_t x110 = (x109 >> 0x19);
+ uint32_t x111 = ((uint32_t)x109 & 0x1ffffff);
+ uint64_t x112 = (x110 + x68);
+ uint64_t x113 = (x112 >> 0x19);
+ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
+ uint64_t x115 = (x113 + x67);
+ uint64_t x116 = (x115 >> 0x19);
+ uint32_t x117 = ((uint32_t)x115 & 0x1ffffff);
+ uint64_t x118 = (x116 + x66);
+ uint64_t x119 = (x118 >> 0x19);
+ uint32_t x120 = ((uint32_t)x118 & 0x1ffffff);
+ uint64_t x121 = (x119 + x65);
+ uint64_t x122 = (x121 >> 0x19);
+ uint32_t x123 = ((uint32_t)x121 & 0x1ffffff);
+ uint64_t x124 = (x122 + x64);
+ uint64_t x125 = (x124 >> 0x19);
+ uint32_t x126 = ((uint32_t)x124 & 0x1ffffff);
+ uint64_t x127 = (x81 + (0x1f * x125));
uint32_t x128 = (uint32_t) (x127 >> 0x1a);
- uint32_t x129 = (uint32_t) x127 & 0x3ffffff;
- uint32_t x130 = x128 + x84;
- uint32_t x131 = x130 >> 0x19;
- uint32_t x132 = x130 & 0x1ffffff;
- return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, x131 + x87, Return x132, Return x129))
+ uint32_t x129 = ((uint32_t)x127 & 0x3ffffff);
+ uint32_t x130 = (x128 + x84);
+ uint32_t x131 = (x130 >> 0x19);
+ uint32_t x132 = (x130 & 0x1ffffff);
+ return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, (x131 + x87), Return x132, Return x129))
(x, x0)%core
: word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e401m31/fesquare.c b/src/Specific/solinas32_2e401m31/fesquare.c
new file mode 100644
index 000000000..5379438ee
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/fesquare.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+{ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x1f * ((uint64_t)x29 * x29)));
+{ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x1f * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+{ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x1f * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+{ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x1f * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+{ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x1f * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+{ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x1f * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+{ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x1f * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+{ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x1f * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+{ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1f * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+{ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x1f * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+{ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1f * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+{ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1f * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+{ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1f * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+{ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1f * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+{ uint64_t x46 = (((uint64_t)x2 * x2) + (0x1f * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+{ uint64_t x47 = (x46 >> 0x1a);
+{ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+{ uint64_t x49 = (x47 + x45);
+{ uint64_t x50 = (x49 >> 0x19);
+{ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+{ uint64_t x52 = (x50 + x44);
+{ uint64_t x53 = (x52 >> 0x19);
+{ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+{ uint64_t x55 = (x53 + x43);
+{ uint64_t x56 = (x55 >> 0x19);
+{ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+{ uint64_t x58 = (x56 + x42);
+{ uint64_t x59 = (x58 >> 0x19);
+{ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+{ uint64_t x61 = (x59 + x41);
+{ uint64_t x62 = (x61 >> 0x19);
+{ uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
+{ uint64_t x64 = (x62 + x40);
+{ uint64_t x65 = (x64 >> 0x19);
+{ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+{ uint64_t x67 = (x65 + x39);
+{ uint64_t x68 = (x67 >> 0x19);
+{ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+{ uint64_t x70 = (x68 + x38);
+{ uint64_t x71 = (x70 >> 0x19);
+{ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+{ uint64_t x73 = (x71 + x37);
+{ uint64_t x74 = (x73 >> 0x19);
+{ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+{ uint64_t x76 = (x74 + x36);
+{ uint64_t x77 = (x76 >> 0x19);
+{ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+{ uint64_t x79 = (x77 + x35);
+{ uint64_t x80 = (x79 >> 0x19);
+{ uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
+{ uint64_t x82 = (x80 + x34);
+{ uint64_t x83 = (x82 >> 0x19);
+{ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+{ uint64_t x85 = (x83 + x33);
+{ uint64_t x86 = (x85 >> 0x19);
+{ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+{ uint64_t x88 = (x86 + x32);
+{ uint64_t x89 = (x88 >> 0x19);
+{ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+{ uint64_t x91 = (x89 + x31);
+{ uint64_t x92 = (x91 >> 0x19);
+{ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+{ uint64_t x94 = (x48 + (0x1f * x92));
+{ uint32_t x95 = (uint32_t) (x94 >> 0x1a);
+{ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+{ uint32_t x97 = (x95 + x51);
+{ uint32_t x98 = (x97 >> 0x19);
+{ uint32_t x99 = (x97 & 0x1ffffff);
+out[0] = x93;
+out[1] = x90;
+out[2] = x87;
+out[3] = x84;
+out[4] = x81;
+out[5] = x78;
+out[6] = x75;
+out[7] = x72;
+out[8] = x69;
+out[9] = x66;
+out[10] = x63;
+out[11] = x60;
+out[12] = x57;
+out[13] = x98 + x54;
+out[14] = x99;
+out[15] = x96;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e401m31/fesquare.h b/src/Specific/solinas32_2e401m31/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e401m31/fesquareDisplay.log b/src/Specific/solinas32_2e401m31/fesquareDisplay.log
index c6c5e8f93..c1666a6e1 100644
--- a/src/Specific/solinas32_2e401m31/fesquareDisplay.log
+++ b/src/Specific/solinas32_2e401m31/fesquareDisplay.log
@@ -2,75 +2,75 @@
Interp-η
(λ var : Syntax.base_type → Type,
λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
- uint64_t x31 = (uint64_t) x2 * x29 + (0x2 * ((uint64_t) x4 * x30) + (0x2 * ((uint64_t) x6 * x28) + (0x2 * ((uint64_t) x8 * x26) + (0x2 * ((uint64_t) x10 * x24) + (0x2 * ((uint64_t) x12 * x22) + (0x2 * ((uint64_t) x14 * x20) + (0x2 * ((uint64_t) x16 * x18) + (0x2 * ((uint64_t) x18 * x16) + (0x2 * ((uint64_t) x20 * x14) + (0x2 * ((uint64_t) x22 * x12) + (0x2 * ((uint64_t) x24 * x10) + (0x2 * ((uint64_t) x26 * x8) + (0x2 * ((uint64_t) x28 * x6) + (0x2 * ((uint64_t) x30 * x4) + (uint64_t) x29 * x2))))))))))))));
- uint64_t x32 = (uint64_t) x2 * x30 + (0x2 * ((uint64_t) x4 * x28) + (0x2 * ((uint64_t) x6 * x26) + (0x2 * ((uint64_t) x8 * x24) + (0x2 * ((uint64_t) x10 * x22) + (0x2 * ((uint64_t) x12 * x20) + (0x2 * ((uint64_t) x14 * x18) + (0x2 * ((uint64_t) x16 * x16) + (0x2 * ((uint64_t) x18 * x14) + (0x2 * ((uint64_t) x20 * x12) + (0x2 * ((uint64_t) x22 * x10) + (0x2 * ((uint64_t) x24 * x8) + (0x2 * ((uint64_t) x26 * x6) + (0x2 * ((uint64_t) x28 * x4) + (uint64_t) x30 * x2))))))))))))) + 0x1f * ((uint64_t) x29 * x29);
- uint64_t x33 = (uint64_t) x2 * x28 + (0x2 * ((uint64_t) x4 * x26) + (0x2 * ((uint64_t) x6 * x24) + (0x2 * ((uint64_t) x8 * x22) + (0x2 * ((uint64_t) x10 * x20) + (0x2 * ((uint64_t) x12 * x18) + (0x2 * ((uint64_t) x14 * x16) + (0x2 * ((uint64_t) x16 * x14) + (0x2 * ((uint64_t) x18 * x12) + (0x2 * ((uint64_t) x20 * x10) + (0x2 * ((uint64_t) x22 * x8) + (0x2 * ((uint64_t) x24 * x6) + (0x2 * ((uint64_t) x26 * x4) + (uint64_t) x28 * x2)))))))))))) + 0x1f * ((uint64_t) x30 * x29 + (uint64_t) x29 * x30);
- uint64_t x34 = (uint64_t) x2 * x26 + (0x2 * ((uint64_t) x4 * x24) + (0x2 * ((uint64_t) x6 * x22) + (0x2 * ((uint64_t) x8 * x20) + (0x2 * ((uint64_t) x10 * x18) + (0x2 * ((uint64_t) x12 * x16) + (0x2 * ((uint64_t) x14 * x14) + (0x2 * ((uint64_t) x16 * x12) + (0x2 * ((uint64_t) x18 * x10) + (0x2 * ((uint64_t) x20 * x8) + (0x2 * ((uint64_t) x22 * x6) + (0x2 * ((uint64_t) x24 * x4) + (uint64_t) x26 * x2))))))))))) + 0x1f * ((uint64_t) x28 * x29 + ((uint64_t) x30 * x30 + (uint64_t) x29 * x28));
- uint64_t x35 = (uint64_t) x2 * x24 + (0x2 * ((uint64_t) x4 * x22) + (0x2 * ((uint64_t) x6 * x20) + (0x2 * ((uint64_t) x8 * x18) + (0x2 * ((uint64_t) x10 * x16) + (0x2 * ((uint64_t) x12 * x14) + (0x2 * ((uint64_t) x14 * x12) + (0x2 * ((uint64_t) x16 * x10) + (0x2 * ((uint64_t) x18 * x8) + (0x2 * ((uint64_t) x20 * x6) + (0x2 * ((uint64_t) x22 * x4) + (uint64_t) x24 * x2)))))))))) + 0x1f * ((uint64_t) x26 * x29 + ((uint64_t) x28 * x30 + ((uint64_t) x30 * x28 + (uint64_t) x29 * x26)));
- uint64_t x36 = (uint64_t) x2 * x22 + (0x2 * ((uint64_t) x4 * x20) + (0x2 * ((uint64_t) x6 * x18) + (0x2 * ((uint64_t) x8 * x16) + (0x2 * ((uint64_t) x10 * x14) + (0x2 * ((uint64_t) x12 * x12) + (0x2 * ((uint64_t) x14 * x10) + (0x2 * ((uint64_t) x16 * x8) + (0x2 * ((uint64_t) x18 * x6) + (0x2 * ((uint64_t) x20 * x4) + (uint64_t) x22 * x2))))))))) + 0x1f * ((uint64_t) x24 * x29 + ((uint64_t) x26 * x30 + ((uint64_t) x28 * x28 + ((uint64_t) x30 * x26 + (uint64_t) x29 * x24))));
- uint64_t x37 = (uint64_t) x2 * x20 + (0x2 * ((uint64_t) x4 * x18) + (0x2 * ((uint64_t) x6 * x16) + (0x2 * ((uint64_t) x8 * x14) + (0x2 * ((uint64_t) x10 * x12) + (0x2 * ((uint64_t) x12 * x10) + (0x2 * ((uint64_t) x14 * x8) + (0x2 * ((uint64_t) x16 * x6) + (0x2 * ((uint64_t) x18 * x4) + (uint64_t) x20 * x2)))))))) + 0x1f * ((uint64_t) x22 * x29 + ((uint64_t) x24 * x30 + ((uint64_t) x26 * x28 + ((uint64_t) x28 * x26 + ((uint64_t) x30 * x24 + (uint64_t) x29 * x22)))));
- uint64_t x38 = (uint64_t) x2 * x18 + (0x2 * ((uint64_t) x4 * x16) + (0x2 * ((uint64_t) x6 * x14) + (0x2 * ((uint64_t) x8 * x12) + (0x2 * ((uint64_t) x10 * x10) + (0x2 * ((uint64_t) x12 * x8) + (0x2 * ((uint64_t) x14 * x6) + (0x2 * ((uint64_t) x16 * x4) + (uint64_t) x18 * x2))))))) + 0x1f * ((uint64_t) x20 * x29 + ((uint64_t) x22 * x30 + ((uint64_t) x24 * x28 + ((uint64_t) x26 * x26 + ((uint64_t) x28 * x24 + ((uint64_t) x30 * x22 + (uint64_t) x29 * x20))))));
- uint64_t x39 = (uint64_t) x2 * x16 + (0x2 * ((uint64_t) x4 * x14) + (0x2 * ((uint64_t) x6 * x12) + (0x2 * ((uint64_t) x8 * x10) + (0x2 * ((uint64_t) x10 * x8) + (0x2 * ((uint64_t) x12 * x6) + (0x2 * ((uint64_t) x14 * x4) + (uint64_t) x16 * x2)))))) + 0x1f * ((uint64_t) x18 * x29 + ((uint64_t) x20 * x30 + ((uint64_t) x22 * x28 + ((uint64_t) x24 * x26 + ((uint64_t) x26 * x24 + ((uint64_t) x28 * x22 + ((uint64_t) x30 * x20 + (uint64_t) x29 * x18)))))));
- uint64_t x40 = (uint64_t) x2 * x14 + (0x2 * ((uint64_t) x4 * x12) + (0x2 * ((uint64_t) x6 * x10) + (0x2 * ((uint64_t) x8 * x8) + (0x2 * ((uint64_t) x10 * x6) + (0x2 * ((uint64_t) x12 * x4) + (uint64_t) x14 * x2))))) + 0x1f * ((uint64_t) x16 * x29 + ((uint64_t) x18 * x30 + ((uint64_t) x20 * x28 + ((uint64_t) x22 * x26 + ((uint64_t) x24 * x24 + ((uint64_t) x26 * x22 + ((uint64_t) x28 * x20 + ((uint64_t) x30 * x18 + (uint64_t) x29 * x16))))))));
- uint64_t x41 = (uint64_t) x2 * x12 + (0x2 * ((uint64_t) x4 * x10) + (0x2 * ((uint64_t) x6 * x8) + (0x2 * ((uint64_t) x8 * x6) + (0x2 * ((uint64_t) x10 * x4) + (uint64_t) x12 * x2)))) + 0x1f * ((uint64_t) x14 * x29 + ((uint64_t) x16 * x30 + ((uint64_t) x18 * x28 + ((uint64_t) x20 * x26 + ((uint64_t) x22 * x24 + ((uint64_t) x24 * x22 + ((uint64_t) x26 * x20 + ((uint64_t) x28 * x18 + ((uint64_t) x30 * x16 + (uint64_t) x29 * x14)))))))));
- uint64_t x42 = (uint64_t) x2 * x10 + (0x2 * ((uint64_t) x4 * x8) + (0x2 * ((uint64_t) x6 * x6) + (0x2 * ((uint64_t) x8 * x4) + (uint64_t) x10 * x2))) + 0x1f * ((uint64_t) x12 * x29 + ((uint64_t) x14 * x30 + ((uint64_t) x16 * x28 + ((uint64_t) x18 * x26 + ((uint64_t) x20 * x24 + ((uint64_t) x22 * x22 + ((uint64_t) x24 * x20 + ((uint64_t) x26 * x18 + ((uint64_t) x28 * x16 + ((uint64_t) x30 * x14 + (uint64_t) x29 * x12))))))))));
- uint64_t x43 = (uint64_t) x2 * x8 + (0x2 * ((uint64_t) x4 * x6) + (0x2 * ((uint64_t) x6 * x4) + (uint64_t) x8 * x2)) + 0x1f * ((uint64_t) x10 * x29 + ((uint64_t) x12 * x30 + ((uint64_t) x14 * x28 + ((uint64_t) x16 * x26 + ((uint64_t) x18 * x24 + ((uint64_t) x20 * x22 + ((uint64_t) x22 * x20 + ((uint64_t) x24 * x18 + ((uint64_t) x26 * x16 + ((uint64_t) x28 * x14 + ((uint64_t) x30 * x12 + (uint64_t) x29 * x10)))))))))));
- uint64_t x44 = (uint64_t) x2 * x6 + (0x2 * ((uint64_t) x4 * x4) + (uint64_t) x6 * x2) + 0x1f * ((uint64_t) x8 * x29 + ((uint64_t) x10 * x30 + ((uint64_t) x12 * x28 + ((uint64_t) x14 * x26 + ((uint64_t) x16 * x24 + ((uint64_t) x18 * x22 + ((uint64_t) x20 * x20 + ((uint64_t) x22 * x18 + ((uint64_t) x24 * x16 + ((uint64_t) x26 * x14 + ((uint64_t) x28 * x12 + ((uint64_t) x30 * x10 + (uint64_t) x29 * x8))))))))))));
- uint64_t x45 = (uint64_t) x2 * x4 + (uint64_t) x4 * x2 + 0x1f * ((uint64_t) x6 * x29 + ((uint64_t) x8 * x30 + ((uint64_t) x10 * x28 + ((uint64_t) x12 * x26 + ((uint64_t) x14 * x24 + ((uint64_t) x16 * x22 + ((uint64_t) x18 * x20 + ((uint64_t) x20 * x18 + ((uint64_t) x22 * x16 + ((uint64_t) x24 * x14 + ((uint64_t) x26 * x12 + ((uint64_t) x28 * x10 + ((uint64_t) x30 * x8 + (uint64_t) x29 * x6)))))))))))));
- uint64_t x46 = (uint64_t) x2 * x2 + 0x1f * (0x2 * ((uint64_t) x4 * x29) + (0x2 * ((uint64_t) x6 * x30) + (0x2 * ((uint64_t) x8 * x28) + (0x2 * ((uint64_t) x10 * x26) + (0x2 * ((uint64_t) x12 * x24) + (0x2 * ((uint64_t) x14 * x22) + (0x2 * ((uint64_t) x16 * x20) + (0x2 * ((uint64_t) x18 * x18) + (0x2 * ((uint64_t) x20 * x16) + (0x2 * ((uint64_t) x22 * x14) + (0x2 * ((uint64_t) x24 * x12) + (0x2 * ((uint64_t) x26 * x10) + (0x2 * ((uint64_t) x28 * x8) + (0x2 * ((uint64_t) x30 * x6) + 0x2 * ((uint64_t) x29 * x4)))))))))))))));
- uint64_t x47 = x46 >> 0x1a;
- uint32_t x48 = (uint32_t) x46 & 0x3ffffff;
- uint64_t x49 = x47 + x45;
- uint64_t x50 = x49 >> 0x19;
- uint32_t x51 = (uint32_t) x49 & 0x1ffffff;
- uint64_t x52 = x50 + x44;
- uint64_t x53 = x52 >> 0x19;
- uint32_t x54 = (uint32_t) x52 & 0x1ffffff;
- uint64_t x55 = x53 + x43;
- uint64_t x56 = x55 >> 0x19;
- uint32_t x57 = (uint32_t) x55 & 0x1ffffff;
- uint64_t x58 = x56 + x42;
- uint64_t x59 = x58 >> 0x19;
- uint32_t x60 = (uint32_t) x58 & 0x1ffffff;
- uint64_t x61 = x59 + x41;
- uint64_t x62 = x61 >> 0x19;
- uint32_t x63 = (uint32_t) x61 & 0x1ffffff;
- uint64_t x64 = x62 + x40;
- uint64_t x65 = x64 >> 0x19;
- uint32_t x66 = (uint32_t) x64 & 0x1ffffff;
- uint64_t x67 = x65 + x39;
- uint64_t x68 = x67 >> 0x19;
- uint32_t x69 = (uint32_t) x67 & 0x1ffffff;
- uint64_t x70 = x68 + x38;
- uint64_t x71 = x70 >> 0x19;
- uint32_t x72 = (uint32_t) x70 & 0x1ffffff;
- uint64_t x73 = x71 + x37;
- uint64_t x74 = x73 >> 0x19;
- uint32_t x75 = (uint32_t) x73 & 0x1ffffff;
- uint64_t x76 = x74 + x36;
- uint64_t x77 = x76 >> 0x19;
- uint32_t x78 = (uint32_t) x76 & 0x1ffffff;
- uint64_t x79 = x77 + x35;
- uint64_t x80 = x79 >> 0x19;
- uint32_t x81 = (uint32_t) x79 & 0x1ffffff;
- uint64_t x82 = x80 + x34;
- uint64_t x83 = x82 >> 0x19;
- uint32_t x84 = (uint32_t) x82 & 0x1ffffff;
- uint64_t x85 = x83 + x33;
- uint32_t x86 = (uint32_t) (x85 >> 0x19);
- uint32_t x87 = (uint32_t) x85 & 0x1ffffff;
- uint64_t x88 = x86 + x32;
- uint32_t x89 = (uint32_t) (x88 >> 0x19);
- uint32_t x90 = (uint32_t) x88 & 0x1ffffff;
- uint64_t x91 = x89 + x31;
- uint32_t x92 = (uint32_t) (x91 >> 0x19);
- uint32_t x93 = (uint32_t) x91 & 0x1ffffff;
- uint64_t x94 = x48 + (uint64_t) 0x1f * x92;
+ uint64_t x31 = (((uint64_t)x2 * x29) + ((0x2 * ((uint64_t)x4 * x30)) + ((0x2 * ((uint64_t)x6 * x28)) + ((0x2 * ((uint64_t)x8 * x26)) + ((0x2 * ((uint64_t)x10 * x24)) + ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + ((0x2 * ((uint64_t)x16 * x18)) + ((0x2 * ((uint64_t)x18 * x16)) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + ((0x2 * ((uint64_t)x24 * x10)) + ((0x2 * ((uint64_t)x26 * x8)) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ uint64_t x32 = ((((uint64_t)x2 * x30) + ((0x2 * ((uint64_t)x4 * x28)) + ((0x2 * ((uint64_t)x6 * x26)) + ((0x2 * ((uint64_t)x8 * x24)) + ((0x2 * ((uint64_t)x10 * x22)) + ((0x2 * ((uint64_t)x12 * x20)) + ((0x2 * ((uint64_t)x14 * x18)) + ((0x2 * ((uint64_t)x16 * x16)) + ((0x2 * ((uint64_t)x18 * x14)) + ((0x2 * ((uint64_t)x20 * x12)) + ((0x2 * ((uint64_t)x22 * x10)) + ((0x2 * ((uint64_t)x24 * x8)) + ((0x2 * ((uint64_t)x26 * x6)) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) + (0x1f * ((uint64_t)x29 * x29)));
+ uint64_t x33 = ((((uint64_t)x2 * x28) + ((0x2 * ((uint64_t)x4 * x26)) + ((0x2 * ((uint64_t)x6 * x24)) + ((0x2 * ((uint64_t)x8 * x22)) + ((0x2 * ((uint64_t)x10 * x20)) + ((0x2 * ((uint64_t)x12 * x18)) + ((0x2 * ((uint64_t)x14 * x16)) + ((0x2 * ((uint64_t)x16 * x14)) + ((0x2 * ((uint64_t)x18 * x12)) + ((0x2 * ((uint64_t)x20 * x10)) + ((0x2 * ((uint64_t)x22 * x8)) + ((0x2 * ((uint64_t)x24 * x6)) + ((0x2 * ((uint64_t)x26 * x4)) + ((uint64_t)x28 * x2)))))))))))))) + (0x1f * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ uint64_t x34 = ((((uint64_t)x2 * x26) + ((0x2 * ((uint64_t)x4 * x24)) + ((0x2 * ((uint64_t)x6 * x22)) + ((0x2 * ((uint64_t)x8 * x20)) + ((0x2 * ((uint64_t)x10 * x18)) + ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + ((0x2 * ((uint64_t)x18 * x10)) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) + (0x1f * (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))));
+ uint64_t x35 = ((((uint64_t)x2 * x24) + ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + ((0x2 * ((uint64_t)x8 * x18)) + ((0x2 * ((uint64_t)x10 * x16)) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + ((0x2 * ((uint64_t)x16 * x10)) + ((0x2 * ((uint64_t)x18 * x8)) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) + (0x1f * (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))));
+ uint64_t x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + ((0x2 * ((uint64_t)x6 * x18)) + ((0x2 * ((uint64_t)x8 * x16)) + ((0x2 * ((uint64_t)x10 * x14)) + ((0x2 * ((uint64_t)x12 * x12)) + ((0x2 * ((uint64_t)x14 * x10)) + ((0x2 * ((uint64_t)x16 * x8)) + ((0x2 * ((uint64_t)x18 * x6)) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) + (0x1f * (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ uint64_t x37 = ((((uint64_t)x2 * x20) + ((0x2 * ((uint64_t)x4 * x18)) + ((0x2 * ((uint64_t)x6 * x16)) + ((0x2 * ((uint64_t)x8 * x14)) + ((0x2 * ((uint64_t)x10 * x12)) + ((0x2 * ((uint64_t)x12 * x10)) + ((0x2 * ((uint64_t)x14 * x8)) + ((0x2 * ((uint64_t)x16 * x6)) + ((0x2 * ((uint64_t)x18 * x4)) + ((uint64_t)x20 * x2)))))))))) + (0x1f * (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ uint64_t x38 = ((((uint64_t)x2 * x18) + ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + ((0x2 * ((uint64_t)x10 * x10)) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) + (0x1f * (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ uint64_t x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + ((0x2 * ((uint64_t)x8 * x10)) + ((0x2 * ((uint64_t)x10 * x8)) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) + (0x1f * (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ uint64_t x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + ((0x2 * ((uint64_t)x6 * x10)) + ((0x2 * ((uint64_t)x8 * x8)) + ((0x2 * ((uint64_t)x10 * x6)) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) + (0x1f * (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ uint64_t x41 = ((((uint64_t)x2 * x12) + ((0x2 * ((uint64_t)x4 * x10)) + ((0x2 * ((uint64_t)x6 * x8)) + ((0x2 * ((uint64_t)x8 * x6)) + ((0x2 * ((uint64_t)x10 * x4)) + ((uint64_t)x12 * x2)))))) + (0x1f * (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ uint64_t x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) + (0x1f * (((uint64_t)x12 * x29) + (((uint64_t)x14 * x30) + (((uint64_t)x16 * x28) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + (((uint64_t)x26 * x18) + (((uint64_t)x28 * x16) + (((uint64_t)x30 * x14) + ((uint64_t)x29 * x12)))))))))))));
+ uint64_t x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) + (0x1f * (((uint64_t)x10 * x29) + (((uint64_t)x12 * x30) + (((uint64_t)x14 * x28) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + (((uint64_t)x28 * x14) + (((uint64_t)x30 * x12) + ((uint64_t)x29 * x10))))))))))))));
+ uint64_t x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) + (0x1f * (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + (((uint64_t)x12 * x28) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + (((uint64_t)x28 * x12) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ uint64_t x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (0x1f * (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ uint64_t x46 = (((uint64_t)x2 * x2) + (0x1f * ((0x2 * ((uint64_t)x4 * x29)) + ((0x2 * ((uint64_t)x6 * x30)) + ((0x2 * ((uint64_t)x8 * x28)) + ((0x2 * ((uint64_t)x10 * x26)) + ((0x2 * ((uint64_t)x12 * x24)) + ((0x2 * ((uint64_t)x14 * x22)) + ((0x2 * ((uint64_t)x16 * x20)) + ((0x2 * ((uint64_t)x18 * x18)) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + ((0x2 * ((uint64_t)x26 * x10)) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ uint64_t x47 = (x46 >> 0x1a);
+ uint32_t x48 = ((uint32_t)x46 & 0x3ffffff);
+ uint64_t x49 = (x47 + x45);
+ uint64_t x50 = (x49 >> 0x19);
+ uint32_t x51 = ((uint32_t)x49 & 0x1ffffff);
+ uint64_t x52 = (x50 + x44);
+ uint64_t x53 = (x52 >> 0x19);
+ uint32_t x54 = ((uint32_t)x52 & 0x1ffffff);
+ uint64_t x55 = (x53 + x43);
+ uint64_t x56 = (x55 >> 0x19);
+ uint32_t x57 = ((uint32_t)x55 & 0x1ffffff);
+ uint64_t x58 = (x56 + x42);
+ uint64_t x59 = (x58 >> 0x19);
+ uint32_t x60 = ((uint32_t)x58 & 0x1ffffff);
+ uint64_t x61 = (x59 + x41);
+ uint64_t x62 = (x61 >> 0x19);
+ uint32_t x63 = ((uint32_t)x61 & 0x1ffffff);
+ uint64_t x64 = (x62 + x40);
+ uint64_t x65 = (x64 >> 0x19);
+ uint32_t x66 = ((uint32_t)x64 & 0x1ffffff);
+ uint64_t x67 = (x65 + x39);
+ uint64_t x68 = (x67 >> 0x19);
+ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+ uint64_t x70 = (x68 + x38);
+ uint64_t x71 = (x70 >> 0x19);
+ uint32_t x72 = ((uint32_t)x70 & 0x1ffffff);
+ uint64_t x73 = (x71 + x37);
+ uint64_t x74 = (x73 >> 0x19);
+ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+ uint64_t x76 = (x74 + x36);
+ uint64_t x77 = (x76 >> 0x19);
+ uint32_t x78 = ((uint32_t)x76 & 0x1ffffff);
+ uint64_t x79 = (x77 + x35);
+ uint64_t x80 = (x79 >> 0x19);
+ uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
+ uint64_t x82 = (x80 + x34);
+ uint64_t x83 = (x82 >> 0x19);
+ uint32_t x84 = ((uint32_t)x82 & 0x1ffffff);
+ uint64_t x85 = (x83 + x33);
+ uint64_t x86 = (x85 >> 0x19);
+ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+ uint64_t x88 = (x86 + x32);
+ uint64_t x89 = (x88 >> 0x19);
+ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+ uint64_t x91 = (x89 + x31);
+ uint64_t x92 = (x91 >> 0x19);
+ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+ uint64_t x94 = (x48 + (0x1f * x92));
uint32_t x95 = (uint32_t) (x94 >> 0x1a);
- uint32_t x96 = (uint32_t) x94 & 0x3ffffff;
- uint32_t x97 = x95 + x51;
- uint32_t x98 = x97 >> 0x19;
- uint32_t x99 = x97 & 0x1ffffff;
- return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, x98 + x54, Return x99, Return x96))
+ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+ uint32_t x97 = (x95 + x51);
+ uint32_t x98 = (x97 >> 0x19);
+ uint32_t x99 = (x97 & 0x1ffffff);
+ return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, (x98 + x54), Return x99, Return x96))
x
: word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e401m31/freeze.c b/src/Specific/solinas32_2e401m31/freeze.c
new file mode 100644
index 000000000..d3f0fe91e
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffe1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e401m31/freeze.h b/src/Specific/solinas32_2e401m31/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e401m31/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e401m31/freezeDisplay.log b/src/Specific/solinas32_2e401m31/freezeDisplay.log
index 26bac0787..c3bc3bec7 100644
--- a/src/Specific/solinas32_2e401m31/freezeDisplay.log
+++ b/src/Specific/solinas32_2e401m31/freezeDisplay.log
@@ -19,37 +19,37 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x1ffffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x1ffffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0x3ffffe1;
+ uint32_t x80 = (x79 & 0x3ffffe1);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint32_t x84 = x79 & 0x1ffffff;
+ uint32_t x84 = (x79 & 0x1ffffff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
- uint32_t x88 = x79 & 0x1ffffff;
+ uint32_t x88 = (x79 & 0x1ffffff);
uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
- uint32_t x92 = x79 & 0x1ffffff;
+ uint32_t x92 = (x79 & 0x1ffffff);
uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
- uint32_t x96 = x79 & 0x1ffffff;
+ uint32_t x96 = (x79 & 0x1ffffff);
uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
- uint32_t x100 = x79 & 0x1ffffff;
+ uint32_t x100 = (x79 & 0x1ffffff);
uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
- uint32_t x104 = x79 & 0x1ffffff;
+ uint32_t x104 = (x79 & 0x1ffffff);
uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
- uint32_t x108 = x79 & 0x1ffffff;
+ uint32_t x108 = (x79 & 0x1ffffff);
uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
- uint32_t x112 = x79 & 0x1ffffff;
+ uint32_t x112 = (x79 & 0x1ffffff);
uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
- uint32_t x116 = x79 & 0x1ffffff;
+ uint32_t x116 = (x79 & 0x1ffffff);
uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
- uint32_t x120 = x79 & 0x1ffffff;
+ uint32_t x120 = (x79 & 0x1ffffff);
uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
- uint32_t x124 = x79 & 0x1ffffff;
+ uint32_t x124 = (x79 & 0x1ffffff);
uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
- uint32_t x128 = x79 & 0x1ffffff;
+ uint32_t x128 = (x79 & 0x1ffffff);
uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
- uint32_t x132 = x79 & 0x1ffffff;
+ uint32_t x132 = (x79 & 0x1ffffff);
uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
- uint32_t x136 = x79 & 0x1ffffff;
+ uint32_t x136 = (x79 & 0x1ffffff);
uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
- uint32_t x140 = x79 & 0x1ffffff;
+ uint32_t x140 = (x79 & 0x1ffffff);
uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
(Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
diff --git a/src/Specific/solinas32_2e416m2e208m1/femul.c b/src/Specific/solinas32_2e416m2e208m1/femul.c
new file mode 100644
index 000000000..6a8378ba7
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/femul.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+{ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+{ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+{ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+{ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+{ uint64_t x69 = ((((uint64_t)(x9 + x25) * (x49 + x62)) + (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) - (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))));
+{ uint64_t x70 = ((((uint64_t)(x7 + x23) * (x49 + x62)) + (((uint64_t)(x9 + x25) * (x47 + x63)) + (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) - (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))));
+{ uint64_t x71 = ((((uint64_t)(x5 + x21) * (x49 + x62)) + (((uint64_t)(x7 + x23) * (x47 + x63)) + (((uint64_t)(x9 + x25) * (x45 + x61)) + (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) - (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))));
+{ uint64_t x72 = ((((uint64_t)(x5 + x21) * (x47 + x63)) + (((uint64_t)(x7 + x23) * (x45 + x61)) + (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) - (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))));
+{ uint64_t x73 = ((((uint64_t)(x5 + x21) * (x45 + x61)) + (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) - (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))));
+{ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+{ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+{ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+{ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+{ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+{ uint64_t x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) + x72) + x64);
+{ uint64_t x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) + x65);
+{ uint64_t x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) + x66);
+{ uint64_t x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) + x67);
+{ uint64_t x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) + x68);
+{ uint64_t x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) + x69);
+{ uint64_t x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) + x70);
+{ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+{ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+{ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+{ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+{ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+{ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+{ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+{ uint64_t x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) + x70);
+{ uint64_t x94 = (x86 >> 0x1a);
+{ uint32_t x95 = ((uint32_t)x86 & 0x3ffffff);
+{ uint64_t x96 = (x71 >> 0x1a);
+{ uint32_t x97 = ((uint32_t)x71 & 0x3ffffff);
+{ uint64_t x98 = ((0x4000000 * x96) + x97);
+{ uint64_t x99 = (x98 >> 0x1a);
+{ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+{ uint64_t x101 = ((x94 + x85) + x99);
+{ uint64_t x102 = (x101 >> 0x1a);
+{ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+{ uint64_t x104 = (x93 + x99);
+{ uint64_t x105 = (x104 >> 0x1a);
+{ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+{ uint64_t x107 = (x102 + x84);
+{ uint64_t x108 = (x107 >> 0x1a);
+{ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+{ uint64_t x110 = (x105 + x92);
+{ uint64_t x111 = (x110 >> 0x1a);
+{ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+{ uint64_t x113 = (x108 + x83);
+{ uint64_t x114 = (x113 >> 0x1a);
+{ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+{ uint64_t x116 = (x111 + x91);
+{ uint64_t x117 = (x116 >> 0x1a);
+{ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+{ uint64_t x119 = (x114 + x82);
+{ uint64_t x120 = (x119 >> 0x1a);
+{ uint32_t x121 = ((uint32_t)x119 & 0x3ffffff);
+{ uint64_t x122 = (x117 + x90);
+{ uint64_t x123 = (x122 >> 0x1a);
+{ uint32_t x124 = ((uint32_t)x122 & 0x3ffffff);
+{ uint64_t x125 = (x120 + x81);
+{ uint64_t x126 = (x125 >> 0x1a);
+{ uint32_t x127 = ((uint32_t)x125 & 0x3ffffff);
+{ uint64_t x128 = (x123 + x89);
+{ uint64_t x129 = (x128 >> 0x1a);
+{ uint32_t x130 = ((uint32_t)x128 & 0x3ffffff);
+{ uint64_t x131 = (x126 + x80);
+{ uint64_t x132 = (x131 >> 0x1a);
+{ uint32_t x133 = ((uint32_t)x131 & 0x3ffffff);
+{ uint64_t x134 = (x129 + x88);
+{ uint64_t x135 = (x134 >> 0x1a);
+{ uint32_t x136 = ((uint32_t)x134 & 0x3ffffff);
+{ uint64_t x137 = (x132 + x79);
+{ uint64_t x138 = (x137 >> 0x1a);
+{ uint32_t x139 = ((uint32_t)x137 & 0x3ffffff);
+{ uint64_t x140 = (x135 + x87);
+{ uint64_t x141 = (x140 >> 0x1a);
+{ uint32_t x142 = ((uint32_t)x140 & 0x3ffffff);
+{ uint64_t x143 = (x138 + x100);
+{ uint32_t x144 = (uint32_t) (x143 >> 0x1a);
+{ uint32_t x145 = ((uint32_t)x143 & 0x3ffffff);
+{ uint64_t x146 = (x141 + x95);
+{ uint32_t x147 = (uint32_t) (x146 >> 0x1a);
+{ uint32_t x148 = ((uint32_t)x146 & 0x3ffffff);
+{ uint64_t x149 = (((uint64_t)0x4000000 * x144) + x145);
+{ uint32_t x150 = (uint32_t) (x149 >> 0x1a);
+{ uint32_t x151 = ((uint32_t)x149 & 0x3ffffff);
+{ uint32_t x152 = ((x147 + x103) + x150);
+{ uint32_t x153 = (x152 >> 0x1a);
+{ uint32_t x154 = (x152 & 0x3ffffff);
+{ uint32_t x155 = (x106 + x150);
+{ uint32_t x156 = (x155 >> 0x1a);
+{ uint32_t x157 = (x155 & 0x3ffffff);
+out[0] = x151;
+out[1] = x139;
+out[2] = x133;
+out[3] = x127;
+out[4] = x121;
+out[5] = x115;
+out[6] = x153 + x109;
+out[7] = x154;
+out[8] = x148;
+out[9] = x142;
+out[10] = x136;
+out[11] = x130;
+out[12] = x124;
+out[13] = x118;
+out[14] = x156 + x112;
+out[15] = x157;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e416m2e208m1/femul.h b/src/Specific/solinas32_2e416m2e208m1/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e416m2e208m1/femulDisplay.log b/src/Specific/solinas32_2e416m2e208m1/femulDisplay.log
new file mode 100644
index 000000000..a1a68ec37
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/femulDisplay.log
@@ -0,0 +1,101 @@
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+ uint64_t x69 = ((((uint64_t)(x9 + x25) * (x49 + x62)) + (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) - (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))));
+ uint64_t x70 = ((((uint64_t)(x7 + x23) * (x49 + x62)) + (((uint64_t)(x9 + x25) * (x47 + x63)) + (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) - (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))));
+ uint64_t x71 = ((((uint64_t)(x5 + x21) * (x49 + x62)) + (((uint64_t)(x7 + x23) * (x47 + x63)) + (((uint64_t)(x9 + x25) * (x45 + x61)) + (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) - (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))));
+ uint64_t x72 = ((((uint64_t)(x5 + x21) * (x47 + x63)) + (((uint64_t)(x7 + x23) * (x45 + x61)) + (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) - (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))));
+ uint64_t x73 = ((((uint64_t)(x5 + x21) * (x45 + x61)) + (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) - (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))));
+ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+ uint64_t x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) + x72) + x64);
+ uint64_t x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) + x65);
+ uint64_t x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) + x66);
+ uint64_t x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) + x67);
+ uint64_t x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) + x68);
+ uint64_t x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) + x69);
+ uint64_t x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) + x70);
+ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+ uint64_t x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) + x70);
+ uint64_t x94 = (x86 >> 0x1a);
+ uint32_t x95 = ((uint32_t)x86 & 0x3ffffff);
+ uint64_t x96 = (x71 >> 0x1a);
+ uint32_t x97 = ((uint32_t)x71 & 0x3ffffff);
+ uint64_t x98 = ((0x4000000 * x96) + x97);
+ uint64_t x99 = (x98 >> 0x1a);
+ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+ uint64_t x101 = ((x94 + x85) + x99);
+ uint64_t x102 = (x101 >> 0x1a);
+ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+ uint64_t x104 = (x93 + x99);
+ uint64_t x105 = (x104 >> 0x1a);
+ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+ uint64_t x107 = (x102 + x84);
+ uint64_t x108 = (x107 >> 0x1a);
+ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+ uint64_t x110 = (x105 + x92);
+ uint64_t x111 = (x110 >> 0x1a);
+ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+ uint64_t x113 = (x108 + x83);
+ uint64_t x114 = (x113 >> 0x1a);
+ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+ uint64_t x116 = (x111 + x91);
+ uint64_t x117 = (x116 >> 0x1a);
+ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+ uint64_t x119 = (x114 + x82);
+ uint64_t x120 = (x119 >> 0x1a);
+ uint32_t x121 = ((uint32_t)x119 & 0x3ffffff);
+ uint64_t x122 = (x117 + x90);
+ uint64_t x123 = (x122 >> 0x1a);
+ uint32_t x124 = ((uint32_t)x122 & 0x3ffffff);
+ uint64_t x125 = (x120 + x81);
+ uint64_t x126 = (x125 >> 0x1a);
+ uint32_t x127 = ((uint32_t)x125 & 0x3ffffff);
+ uint64_t x128 = (x123 + x89);
+ uint64_t x129 = (x128 >> 0x1a);
+ uint32_t x130 = ((uint32_t)x128 & 0x3ffffff);
+ uint64_t x131 = (x126 + x80);
+ uint64_t x132 = (x131 >> 0x1a);
+ uint32_t x133 = ((uint32_t)x131 & 0x3ffffff);
+ uint64_t x134 = (x129 + x88);
+ uint64_t x135 = (x134 >> 0x1a);
+ uint32_t x136 = ((uint32_t)x134 & 0x3ffffff);
+ uint64_t x137 = (x132 + x79);
+ uint64_t x138 = (x137 >> 0x1a);
+ uint32_t x139 = ((uint32_t)x137 & 0x3ffffff);
+ uint64_t x140 = (x135 + x87);
+ uint64_t x141 = (x140 >> 0x1a);
+ uint32_t x142 = ((uint32_t)x140 & 0x3ffffff);
+ uint64_t x143 = (x138 + x100);
+ uint32_t x144 = (uint32_t) (x143 >> 0x1a);
+ uint32_t x145 = ((uint32_t)x143 & 0x3ffffff);
+ uint64_t x146 = (x141 + x95);
+ uint32_t x147 = (uint32_t) (x146 >> 0x1a);
+ uint32_t x148 = ((uint32_t)x146 & 0x3ffffff);
+ uint64_t x149 = (((uint64_t)0x4000000 * x144) + x145);
+ uint32_t x150 = (uint32_t) (x149 >> 0x1a);
+ uint32_t x151 = ((uint32_t)x149 & 0x3ffffff);
+ uint32_t x152 = ((x147 + x103) + x150);
+ uint32_t x153 = (x152 >> 0x1a);
+ uint32_t x154 = (x152 & 0x3ffffff);
+ uint32_t x155 = (x106 + x150);
+ uint32_t x156 = (x155 >> 0x1a);
+ uint32_t x157 = (x155 & 0x3ffffff);
+ return (Return x151, Return x139, Return x133, Return x127, Return x121, Return x115, (x153 + x109), Return x154, Return x148, Return x142, Return x136, Return x130, Return x124, Return x118, (x156 + x112), Return x157))
+(x, x0)%core
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e416m2e208m1/fesquare.c b/src/Specific/solinas32_2e416m2e208m1/fesquare.c
new file mode 100644
index 000000000..0b436503e
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/fesquare.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+{ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+{ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+{ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+{ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+{ uint64_t x36 = ((((uint64_t)(x6 + x22) * (x16 + x29)) + (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) - (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))));
+{ uint64_t x37 = ((((uint64_t)(x4 + x20) * (x16 + x29)) + (((uint64_t)(x6 + x22) * (x14 + x30)) + (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) - (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))));
+{ uint64_t x38 = ((((uint64_t)(x2 + x18) * (x16 + x29)) + (((uint64_t)(x4 + x20) * (x14 + x30)) + (((uint64_t)(x6 + x22) * (x12 + x28)) + (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) - (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))));
+{ uint64_t x39 = ((((uint64_t)(x2 + x18) * (x14 + x30)) + (((uint64_t)(x4 + x20) * (x12 + x28)) + (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
+{ uint64_t x40 = ((((uint64_t)(x2 + x18) * (x12 + x28)) + (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
+{ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+{ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+{ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+{ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+{ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+{ uint64_t x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) + x39) + x31);
+{ uint64_t x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) + x32);
+{ uint64_t x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) + x33);
+{ uint64_t x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) + x34);
+{ uint64_t x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) + x35);
+{ uint64_t x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) + x36);
+{ uint64_t x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) + x37);
+{ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+{ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+{ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+{ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+{ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+{ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+{ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+{ uint64_t x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) + x37);
+{ uint64_t x61 = (x53 >> 0x1a);
+{ uint32_t x62 = ((uint32_t)x53 & 0x3ffffff);
+{ uint64_t x63 = (x38 >> 0x1a);
+{ uint32_t x64 = ((uint32_t)x38 & 0x3ffffff);
+{ uint64_t x65 = ((0x4000000 * x63) + x64);
+{ uint64_t x66 = (x65 >> 0x1a);
+{ uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
+{ uint64_t x68 = ((x61 + x52) + x66);
+{ uint64_t x69 = (x68 >> 0x1a);
+{ uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
+{ uint64_t x71 = (x60 + x66);
+{ uint64_t x72 = (x71 >> 0x1a);
+{ uint32_t x73 = ((uint32_t)x71 & 0x3ffffff);
+{ uint64_t x74 = (x69 + x51);
+{ uint64_t x75 = (x74 >> 0x1a);
+{ uint32_t x76 = ((uint32_t)x74 & 0x3ffffff);
+{ uint64_t x77 = (x72 + x59);
+{ uint64_t x78 = (x77 >> 0x1a);
+{ uint32_t x79 = ((uint32_t)x77 & 0x3ffffff);
+{ uint64_t x80 = (x75 + x50);
+{ uint64_t x81 = (x80 >> 0x1a);
+{ uint32_t x82 = ((uint32_t)x80 & 0x3ffffff);
+{ uint64_t x83 = (x78 + x58);
+{ uint64_t x84 = (x83 >> 0x1a);
+{ uint32_t x85 = ((uint32_t)x83 & 0x3ffffff);
+{ uint64_t x86 = (x81 + x49);
+{ uint64_t x87 = (x86 >> 0x1a);
+{ uint32_t x88 = ((uint32_t)x86 & 0x3ffffff);
+{ uint64_t x89 = (x84 + x57);
+{ uint64_t x90 = (x89 >> 0x1a);
+{ uint32_t x91 = ((uint32_t)x89 & 0x3ffffff);
+{ uint64_t x92 = (x87 + x48);
+{ uint64_t x93 = (x92 >> 0x1a);
+{ uint32_t x94 = ((uint32_t)x92 & 0x3ffffff);
+{ uint64_t x95 = (x90 + x56);
+{ uint64_t x96 = (x95 >> 0x1a);
+{ uint32_t x97 = ((uint32_t)x95 & 0x3ffffff);
+{ uint64_t x98 = (x93 + x47);
+{ uint64_t x99 = (x98 >> 0x1a);
+{ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+{ uint64_t x101 = (x96 + x55);
+{ uint64_t x102 = (x101 >> 0x1a);
+{ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+{ uint64_t x104 = (x99 + x46);
+{ uint64_t x105 = (x104 >> 0x1a);
+{ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+{ uint64_t x107 = (x102 + x54);
+{ uint64_t x108 = (x107 >> 0x1a);
+{ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+{ uint64_t x110 = (x105 + x67);
+{ uint32_t x111 = (uint32_t) (x110 >> 0x1a);
+{ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+{ uint64_t x113 = (x108 + x62);
+{ uint32_t x114 = (uint32_t) (x113 >> 0x1a);
+{ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+{ uint64_t x116 = (((uint64_t)0x4000000 * x111) + x112);
+{ uint32_t x117 = (uint32_t) (x116 >> 0x1a);
+{ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+{ uint32_t x119 = ((x114 + x70) + x117);
+{ uint32_t x120 = (x119 >> 0x1a);
+{ uint32_t x121 = (x119 & 0x3ffffff);
+{ uint32_t x122 = (x73 + x117);
+{ uint32_t x123 = (x122 >> 0x1a);
+{ uint32_t x124 = (x122 & 0x3ffffff);
+out[0] = x118;
+out[1] = x106;
+out[2] = x100;
+out[3] = x94;
+out[4] = x88;
+out[5] = x82;
+out[6] = x120 + x76;
+out[7] = x121;
+out[8] = x115;
+out[9] = x109;
+out[10] = x103;
+out[11] = x97;
+out[12] = x91;
+out[13] = x85;
+out[14] = x123 + x79;
+out[15] = x124;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e416m2e208m1/fesquare.h b/src/Specific/solinas32_2e416m2e208m1/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e416m2e208m1/fesquareDisplay.log b/src/Specific/solinas32_2e416m2e208m1/fesquareDisplay.log
new file mode 100644
index 000000000..53d66e31c
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/fesquareDisplay.log
@@ -0,0 +1,101 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+ uint64_t x36 = ((((uint64_t)(x6 + x22) * (x16 + x29)) + (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) - (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))));
+ uint64_t x37 = ((((uint64_t)(x4 + x20) * (x16 + x29)) + (((uint64_t)(x6 + x22) * (x14 + x30)) + (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) - (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))));
+ uint64_t x38 = ((((uint64_t)(x2 + x18) * (x16 + x29)) + (((uint64_t)(x4 + x20) * (x14 + x30)) + (((uint64_t)(x6 + x22) * (x12 + x28)) + (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) - (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))));
+ uint64_t x39 = ((((uint64_t)(x2 + x18) * (x14 + x30)) + (((uint64_t)(x4 + x20) * (x12 + x28)) + (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) - (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))));
+ uint64_t x40 = ((((uint64_t)(x2 + x18) * (x12 + x28)) + (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) - (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))));
+ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+ uint64_t x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) + x39) + x31);
+ uint64_t x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) + x32);
+ uint64_t x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) + x33);
+ uint64_t x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) + x34);
+ uint64_t x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) + x35);
+ uint64_t x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) + x36);
+ uint64_t x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) + x37);
+ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+ uint64_t x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) + x37);
+ uint64_t x61 = (x53 >> 0x1a);
+ uint32_t x62 = ((uint32_t)x53 & 0x3ffffff);
+ uint64_t x63 = (x38 >> 0x1a);
+ uint32_t x64 = ((uint32_t)x38 & 0x3ffffff);
+ uint64_t x65 = ((0x4000000 * x63) + x64);
+ uint64_t x66 = (x65 >> 0x1a);
+ uint32_t x67 = ((uint32_t)x65 & 0x3ffffff);
+ uint64_t x68 = ((x61 + x52) + x66);
+ uint64_t x69 = (x68 >> 0x1a);
+ uint32_t x70 = ((uint32_t)x68 & 0x3ffffff);
+ uint64_t x71 = (x60 + x66);
+ uint64_t x72 = (x71 >> 0x1a);
+ uint32_t x73 = ((uint32_t)x71 & 0x3ffffff);
+ uint64_t x74 = (x69 + x51);
+ uint64_t x75 = (x74 >> 0x1a);
+ uint32_t x76 = ((uint32_t)x74 & 0x3ffffff);
+ uint64_t x77 = (x72 + x59);
+ uint64_t x78 = (x77 >> 0x1a);
+ uint32_t x79 = ((uint32_t)x77 & 0x3ffffff);
+ uint64_t x80 = (x75 + x50);
+ uint64_t x81 = (x80 >> 0x1a);
+ uint32_t x82 = ((uint32_t)x80 & 0x3ffffff);
+ uint64_t x83 = (x78 + x58);
+ uint64_t x84 = (x83 >> 0x1a);
+ uint32_t x85 = ((uint32_t)x83 & 0x3ffffff);
+ uint64_t x86 = (x81 + x49);
+ uint64_t x87 = (x86 >> 0x1a);
+ uint32_t x88 = ((uint32_t)x86 & 0x3ffffff);
+ uint64_t x89 = (x84 + x57);
+ uint64_t x90 = (x89 >> 0x1a);
+ uint32_t x91 = ((uint32_t)x89 & 0x3ffffff);
+ uint64_t x92 = (x87 + x48);
+ uint64_t x93 = (x92 >> 0x1a);
+ uint32_t x94 = ((uint32_t)x92 & 0x3ffffff);
+ uint64_t x95 = (x90 + x56);
+ uint64_t x96 = (x95 >> 0x1a);
+ uint32_t x97 = ((uint32_t)x95 & 0x3ffffff);
+ uint64_t x98 = (x93 + x47);
+ uint64_t x99 = (x98 >> 0x1a);
+ uint32_t x100 = ((uint32_t)x98 & 0x3ffffff);
+ uint64_t x101 = (x96 + x55);
+ uint64_t x102 = (x101 >> 0x1a);
+ uint32_t x103 = ((uint32_t)x101 & 0x3ffffff);
+ uint64_t x104 = (x99 + x46);
+ uint64_t x105 = (x104 >> 0x1a);
+ uint32_t x106 = ((uint32_t)x104 & 0x3ffffff);
+ uint64_t x107 = (x102 + x54);
+ uint64_t x108 = (x107 >> 0x1a);
+ uint32_t x109 = ((uint32_t)x107 & 0x3ffffff);
+ uint64_t x110 = (x105 + x67);
+ uint32_t x111 = (uint32_t) (x110 >> 0x1a);
+ uint32_t x112 = ((uint32_t)x110 & 0x3ffffff);
+ uint64_t x113 = (x108 + x62);
+ uint32_t x114 = (uint32_t) (x113 >> 0x1a);
+ uint32_t x115 = ((uint32_t)x113 & 0x3ffffff);
+ uint64_t x116 = (((uint64_t)0x4000000 * x111) + x112);
+ uint32_t x117 = (uint32_t) (x116 >> 0x1a);
+ uint32_t x118 = ((uint32_t)x116 & 0x3ffffff);
+ uint32_t x119 = ((x114 + x70) + x117);
+ uint32_t x120 = (x119 >> 0x1a);
+ uint32_t x121 = (x119 & 0x3ffffff);
+ uint32_t x122 = (x73 + x117);
+ uint32_t x123 = (x122 >> 0x1a);
+ uint32_t x124 = (x122 & 0x3ffffff);
+ return (Return x118, Return x106, Return x100, Return x94, Return x88, Return x82, (x120 + x76), Return x121, Return x115, Return x109, Return x103, Return x97, Return x91, Return x85, (x123 + x79), Return x124))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e416m2e208m1/freeze.c b/src/Specific/solinas32_2e416m2e208m1/freeze.c
new file mode 100644
index 000000000..5de4dcf81
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 26 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e416m2e208m1/freeze.h b/src/Specific/solinas32_2e416m2e208m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e416m2e208m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e416m2e208m1/freezeDisplay.log b/src/Specific/solinas32_2e416m2e208m1/freezeDisplay.log
index 7cd934e0d..58a5111f0 100644
--- a/src/Specific/solinas32_2e416m2e208m1/freezeDisplay.log
+++ b/src/Specific/solinas32_2e416m2e208m1/freezeDisplay.log
@@ -19,37 +19,37 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x3ffffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x3ffffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0x3ffffff;
+ uint32_t x80 = (x79 & 0x3ffffff);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint32_t x84 = x79 & 0x3ffffff;
+ uint32_t x84 = (x79 & 0x3ffffff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
- uint32_t x88 = x79 & 0x3ffffff;
+ uint32_t x88 = (x79 & 0x3ffffff);
uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
- uint32_t x92 = x79 & 0x3ffffff;
+ uint32_t x92 = (x79 & 0x3ffffff);
uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
- uint32_t x96 = x79 & 0x3ffffff;
+ uint32_t x96 = (x79 & 0x3ffffff);
uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
- uint32_t x100 = x79 & 0x3ffffff;
+ uint32_t x100 = (x79 & 0x3ffffff);
uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
- uint32_t x104 = x79 & 0x3ffffff;
+ uint32_t x104 = (x79 & 0x3ffffff);
uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
- uint32_t x108 = x79 & 0x3ffffff;
+ uint32_t x108 = (x79 & 0x3ffffff);
uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
- uint32_t x112 = x79 & 0x3fffffe;
+ uint32_t x112 = (x79 & 0x3fffffe);
uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
- uint32_t x116 = x79 & 0x3ffffff;
+ uint32_t x116 = (x79 & 0x3ffffff);
uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
- uint32_t x120 = x79 & 0x3ffffff;
+ uint32_t x120 = (x79 & 0x3ffffff);
uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
- uint32_t x124 = x79 & 0x3ffffff;
+ uint32_t x124 = (x79 & 0x3ffffff);
uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
- uint32_t x128 = x79 & 0x3ffffff;
+ uint32_t x128 = (x79 & 0x3ffffff);
uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
- uint32_t x132 = x79 & 0x3ffffff;
+ uint32_t x132 = (x79 & 0x3ffffff);
uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
- uint32_t x136 = x79 & 0x3ffffff;
+ uint32_t x136 = (x79 & 0x3ffffff);
uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
- uint32_t x140 = x79 & 0x3ffffff;
+ uint32_t x140 = (x79 & 0x3ffffff);
uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
(Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
diff --git a/src/Specific/solinas32_2e448m2e224m1/femul.c b/src/Specific/solinas32_2e448m2e224m1/femul.c
new file mode 100644
index 000000000..3325c3e94
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/femul.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+{ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+{ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+{ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+{ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+{ uint64_t x69 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x9 + x25) * (x49 + x62)) +ℤ (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + ((uint64_t)x19 * x55)))))) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+{ uint64_t x70 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x7 + x23) * (x49 + x62)) +ℤ (((uint64_t)(x9 + x25) * (x47 + x63)) +ℤ (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) + (((uint64_t)x9 * x63) + (((uint64_t)x11 * x61) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + ((uint64_t)x19 * x53))))))) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + (((uint64_t)x33 * x39) + ((uint64_t)x32 * x37)))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+{ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x5 + x21) * (x49 + x62)) +ℤ (((uint64_t)(x7 + x23) * (x47 + x63)) +ℤ (((uint64_t)(x9 + x25) * (x45 + x61)) +ℤ (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) + (((uint64_t)x7 * x63) + (((uint64_t)x9 * x61) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((uint64_t)x19 * x51)))))))) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + (((uint64_t)x33 * x37) + ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+{ uint64_t x72 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x47 + x63)) +ℤ (((uint64_t)(x7 + x23) * (x45 + x61)) +ℤ (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) + (((uint64_t)x7 * x61) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + ((uint64_t)x17 * x51))))))) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x33 * x35)))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))));
+{ uint64_t x73 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x45 + x61)) +ℤ (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + ((uint64_t)x15 * x51)))))) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35))))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))));
+{ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+{ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+{ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+{ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+{ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+{ ℤ x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+{ ℤ x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) +ℤ x65);
+{ ℤ x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) +ℤ x66);
+{ ℤ x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) +ℤ x67);
+{ ℤ x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) +ℤ x68);
+{ ℤ x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) +ℤ x69);
+{ ℤ x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) +ℤ x70);
+{ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+{ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+{ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+{ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+{ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+{ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+{ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+{ ℤ x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) +ℤ x70);
+{ uint64_t x94 = (x86 >> 0x1c);
+{ uint32_t x95 = ((uint32_t)x86 & 0xfffffff);
+{ uint64_t x96 = (x71 >> 0x1c);
+{ uint32_t x97 = (x71 & 0xfffffff);
+{ ℤ x98 = ((0x10000000 *ℤ x96) +ℤ x97);
+{ uint64_t x99 = (x98 >> 0x1c);
+{ uint32_t x100 = (x98 & 0xfffffff);
+{ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+{ uint64_t x102 = (x101 >> 0x1c);
+{ uint32_t x103 = (x101 & 0xfffffff);
+{ ℤ x104 = (x93 +ℤ x99);
+{ uint64_t x105 = (x104 >> 0x1c);
+{ uint32_t x106 = (x104 & 0xfffffff);
+{ ℤ x107 = (x102 +ℤ x84);
+{ uint64_t x108 = (x107 >> 0x1c);
+{ uint32_t x109 = (x107 & 0xfffffff);
+{ uint64_t x110 = (x105 + x92);
+{ uint64_t x111 = (x110 >> 0x1c);
+{ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+{ ℤ x113 = (x108 +ℤ x83);
+{ uint64_t x114 = (x113 >> 0x1c);
+{ uint32_t x115 = (x113 & 0xfffffff);
+{ uint64_t x116 = (x111 + x91);
+{ uint64_t x117 = (x116 >> 0x1c);
+{ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+{ ℤ x119 = (x114 +ℤ x82);
+{ uint64_t x120 = (x119 >> 0x1c);
+{ uint32_t x121 = (x119 & 0xfffffff);
+{ uint64_t x122 = (x117 + x90);
+{ uint64_t x123 = (x122 >> 0x1c);
+{ uint32_t x124 = ((uint32_t)x122 & 0xfffffff);
+{ ℤ x125 = (x120 +ℤ x81);
+{ uint64_t x126 = (x125 >> 0x1c);
+{ uint32_t x127 = (x125 & 0xfffffff);
+{ uint64_t x128 = (x123 + x89);
+{ uint64_t x129 = (x128 >> 0x1c);
+{ uint32_t x130 = ((uint32_t)x128 & 0xfffffff);
+{ ℤ x131 = (x126 +ℤ x80);
+{ uint64_t x132 = (x131 >> 0x1c);
+{ uint32_t x133 = (x131 & 0xfffffff);
+{ uint64_t x134 = (x129 + x88);
+{ uint64_t x135 = (x134 >> 0x1c);
+{ uint32_t x136 = ((uint32_t)x134 & 0xfffffff);
+{ ℤ x137 = (x132 +ℤ x79);
+{ uint64_t x138 = (x137 >> 0x1c);
+{ uint32_t x139 = (x137 & 0xfffffff);
+{ uint64_t x140 = (x135 + x87);
+{ uint64_t x141 = (x140 >> 0x1c);
+{ uint32_t x142 = ((uint32_t)x140 & 0xfffffff);
+{ uint64_t x143 = (x138 + x100);
+{ uint32_t x144 = (uint32_t) (x143 >> 0x1c);
+{ uint32_t x145 = ((uint32_t)x143 & 0xfffffff);
+{ uint64_t x146 = (x141 + x95);
+{ uint32_t x147 = (uint32_t) (x146 >> 0x1c);
+{ uint32_t x148 = ((uint32_t)x146 & 0xfffffff);
+{ uint64_t x149 = (((uint64_t)0x10000000 * x144) + x145);
+{ uint32_t x150 = (uint32_t) (x149 >> 0x1c);
+{ uint32_t x151 = ((uint32_t)x149 & 0xfffffff);
+{ uint32_t x152 = ((x147 + x103) + x150);
+{ uint32_t x153 = (x152 >> 0x1c);
+{ uint32_t x154 = (x152 & 0xfffffff);
+{ uint32_t x155 = (x106 + x150);
+{ uint32_t x156 = (x155 >> 0x1c);
+{ uint32_t x157 = (x155 & 0xfffffff);
+out[0] = x151;
+out[1] = x139;
+out[2] = x133;
+out[3] = x127;
+out[4] = x121;
+out[5] = x115;
+out[6] = x153 + x109;
+out[7] = x154;
+out[8] = x148;
+out[9] = x142;
+out[10] = x136;
+out[11] = x130;
+out[12] = x124;
+out[13] = x118;
+out[14] = x156 + x112;
+out[15] = x157;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e448m2e224m1/femul.h b/src/Specific/solinas32_2e448m2e224m1/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e448m2e224m1/femulDisplay.log b/src/Specific/solinas32_2e448m2e224m1/femulDisplay.log
new file mode 100644
index 000000000..f2ea9e588
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/femulDisplay.log
@@ -0,0 +1,101 @@
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ uint64_t x64 = (((uint64_t)(x19 + x32) * (x49 + x62)) - ((uint64_t)x19 * x49));
+ uint64_t x65 = ((((uint64_t)(x17 + x33) * (x49 + x62)) + ((uint64_t)(x19 + x32) * (x47 + x63))) - (((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)));
+ uint64_t x66 = ((((uint64_t)(x15 + x31) * (x49 + x62)) + (((uint64_t)(x17 + x33) * (x47 + x63)) + ((uint64_t)(x19 + x32) * (x45 + x61)))) - (((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))));
+ uint64_t x67 = ((((uint64_t)(x13 + x29) * (x49 + x62)) + (((uint64_t)(x15 + x31) * (x47 + x63)) + (((uint64_t)(x17 + x33) * (x45 + x61)) + ((uint64_t)(x19 + x32) * (x43 + x59))))) - (((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))));
+ uint64_t x68 = ((((uint64_t)(x11 + x27) * (x49 + x62)) + (((uint64_t)(x13 + x29) * (x47 + x63)) + (((uint64_t)(x15 + x31) * (x45 + x61)) + (((uint64_t)(x17 + x33) * (x43 + x59)) + ((uint64_t)(x19 + x32) * (x41 + x57)))))) - (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))));
+ uint64_t x69 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x9 + x25) * (x49 + x62)) +ℤ (((uint64_t)(x11 + x27) * (x47 + x63)) + (((uint64_t)(x13 + x29) * (x45 + x61)) + (((uint64_t)(x15 + x31) * (x43 + x59)) + (((uint64_t)(x17 + x33) * (x41 + x57)) + ((uint64_t)(x19 + x32) * (x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + ((uint64_t)x19 * x55)))))) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ uint64_t x70 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x7 + x23) * (x49 + x62)) +ℤ (((uint64_t)(x9 + x25) * (x47 + x63)) +ℤ (((uint64_t)(x11 + x27) * (x45 + x61)) + (((uint64_t)(x13 + x29) * (x43 + x59)) + (((uint64_t)(x15 + x31) * (x41 + x57)) + (((uint64_t)(x17 + x33) * (x39 + x55)) + ((uint64_t)(x19 + x32) * (x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) + (((uint64_t)x9 * x63) + (((uint64_t)x11 * x61) + (((uint64_t)x13 * x59) + (((uint64_t)x15 * x57) + (((uint64_t)x17 * x55) + ((uint64_t)x19 * x53))))))) + (((uint64_t)x23 * x49) + (((uint64_t)x25 * x47) + (((uint64_t)x27 * x45) + (((uint64_t)x29 * x43) + (((uint64_t)x31 * x41) + (((uint64_t)x33 * x39) + ((uint64_t)x32 * x37)))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))));
+ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x5 + x21) * (x49 + x62)) +ℤ (((uint64_t)(x7 + x23) * (x47 + x63)) +ℤ (((uint64_t)(x9 + x25) * (x45 + x61)) +ℤ (((uint64_t)(x11 + x27) * (x43 + x59)) + (((uint64_t)(x13 + x29) * (x41 + x57)) + (((uint64_t)(x15 + x31) * (x39 + x55)) + (((uint64_t)(x17 + x33) * (x37 + x53)) + ((uint64_t)(x19 + x32) * (x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) + (((uint64_t)x7 * x63) + (((uint64_t)x9 * x61) + (((uint64_t)x11 * x59) + (((uint64_t)x13 * x57) + (((uint64_t)x15 * x55) + (((uint64_t)x17 * x53) + ((uint64_t)x19 * x51)))))))) + (((uint64_t)x21 * x49) + (((uint64_t)x23 * x47) + (((uint64_t)x25 * x45) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + (((uint64_t)x31 * x39) + (((uint64_t)x33 * x37) + ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51))))))))));
+ uint64_t x72 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x47 + x63)) +ℤ (((uint64_t)(x7 + x23) * (x45 + x61)) +ℤ (((uint64_t)(x9 + x25) * (x43 + x59)) + (((uint64_t)(x11 + x27) * (x41 + x57)) + (((uint64_t)(x13 + x29) * (x39 + x55)) + (((uint64_t)(x15 + x31) * (x37 + x53)) + ((uint64_t)(x17 + x33) * (x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) + (((uint64_t)x7 * x61) + (((uint64_t)x9 * x59) + (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + (((uint64_t)x15 * x53) + ((uint64_t)x17 * x51))))))) + (((uint64_t)x21 * x47) + (((uint64_t)x23 * x45) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + (((uint64_t)x31 * x37) + ((uint64_t)x33 * x35)))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))));
+ uint64_t x73 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x5 + x21) * (x45 + x61)) +ℤ (((uint64_t)(x7 + x23) * (x43 + x59)) + (((uint64_t)(x9 + x25) * (x41 + x57)) + (((uint64_t)(x11 + x27) * (x39 + x55)) + (((uint64_t)(x13 + x29) * (x37 + x53)) + ((uint64_t)(x15 + x31) * (x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + ((uint64_t)x15 * x51)))))) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35))))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))));
+ uint64_t x74 = ((((uint64_t)(x5 + x21) * (x43 + x59)) + (((uint64_t)(x7 + x23) * (x41 + x57)) + (((uint64_t)(x9 + x25) * (x39 + x55)) + (((uint64_t)(x11 + x27) * (x37 + x53)) + ((uint64_t)(x13 + x29) * (x35 + x51)))))) - (((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))));
+ uint64_t x75 = ((((uint64_t)(x5 + x21) * (x41 + x57)) + (((uint64_t)(x7 + x23) * (x39 + x55)) + (((uint64_t)(x9 + x25) * (x37 + x53)) + ((uint64_t)(x11 + x27) * (x35 + x51))))) - (((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))));
+ uint64_t x76 = ((((uint64_t)(x5 + x21) * (x39 + x55)) + (((uint64_t)(x7 + x23) * (x37 + x53)) + ((uint64_t)(x9 + x25) * (x35 + x51)))) - (((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))));
+ uint64_t x77 = ((((uint64_t)(x5 + x21) * (x37 + x53)) + ((uint64_t)(x7 + x23) * (x35 + x51))) - (((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)));
+ uint64_t x78 = (((uint64_t)(x5 + x21) * (x35 + x51)) - ((uint64_t)x5 * x35));
+ ℤ x79 = (((((uint64_t)x19 * x49) + ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+ ℤ x80 = ((((((uint64_t)x17 * x49) + ((uint64_t)x19 * x47)) + (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))) + x73) +ℤ x65);
+ ℤ x81 = ((((((uint64_t)x15 * x49) + (((uint64_t)x17 * x47) + ((uint64_t)x19 * x45))) + (((uint64_t)x31 * x62) + (((uint64_t)x33 * x63) + ((uint64_t)x32 * x61)))) + x74) +ℤ x66);
+ ℤ x82 = ((((((uint64_t)x13 * x49) + (((uint64_t)x15 * x47) + (((uint64_t)x17 * x45) + ((uint64_t)x19 * x43)))) + (((uint64_t)x29 * x62) + (((uint64_t)x31 * x63) + (((uint64_t)x33 * x61) + ((uint64_t)x32 * x59))))) + x75) +ℤ x67);
+ ℤ x83 = ((((((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + (((uint64_t)x15 * x45) + (((uint64_t)x17 * x43) + ((uint64_t)x19 * x41))))) + (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + (((uint64_t)x31 * x61) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))) + x76) +ℤ x68);
+ ℤ x84 = ((((((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + ((uint64_t)x19 * x39)))))) + (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))) + x77) +ℤ x69);
+ ℤ x85 = ((((((uint64_t)x7 * x49) + (((uint64_t)x9 * x47) + (((uint64_t)x11 * x45) + (((uint64_t)x13 * x43) + (((uint64_t)x15 * x41) + (((uint64_t)x17 * x39) + ((uint64_t)x19 * x37))))))) + (((uint64_t)x23 * x62) + (((uint64_t)x25 * x63) + (((uint64_t)x27 * x61) + (((uint64_t)x29 * x59) + (((uint64_t)x31 * x57) + (((uint64_t)x33 * x55) + ((uint64_t)x32 * x53)))))))) + x78) +ℤ x70);
+ uint64_t x86 = ((((uint64_t)x5 * x49) + (((uint64_t)x7 * x47) + (((uint64_t)x9 * x45) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + (((uint64_t)x15 * x39) + (((uint64_t)x17 * x37) + ((uint64_t)x19 * x35)))))))) + (((uint64_t)x21 * x62) + (((uint64_t)x23 * x63) + (((uint64_t)x25 * x61) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + (((uint64_t)x31 * x55) + (((uint64_t)x33 * x53) + ((uint64_t)x32 * x51)))))))));
+ uint64_t x87 = (((((uint64_t)x5 * x47) + (((uint64_t)x7 * x45) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + (((uint64_t)x15 * x37) + ((uint64_t)x17 * x35))))))) + (((uint64_t)x21 * x63) + (((uint64_t)x23 * x61) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + (((uint64_t)x31 * x53) + ((uint64_t)x33 * x51)))))))) + x64);
+ uint64_t x88 = (((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + ((uint64_t)x31 * x51))))))) + x65);
+ uint64_t x89 = (((((uint64_t)x5 * x43) + (((uint64_t)x7 * x41) + (((uint64_t)x9 * x39) + (((uint64_t)x11 * x37) + ((uint64_t)x13 * x35))))) + (((uint64_t)x21 * x59) + (((uint64_t)x23 * x57) + (((uint64_t)x25 * x55) + (((uint64_t)x27 * x53) + ((uint64_t)x29 * x51)))))) + x66);
+ uint64_t x90 = (((((uint64_t)x5 * x41) + (((uint64_t)x7 * x39) + (((uint64_t)x9 * x37) + ((uint64_t)x11 * x35)))) + (((uint64_t)x21 * x57) + (((uint64_t)x23 * x55) + (((uint64_t)x25 * x53) + ((uint64_t)x27 * x51))))) + x67);
+ uint64_t x91 = (((((uint64_t)x5 * x39) + (((uint64_t)x7 * x37) + ((uint64_t)x9 * x35))) + (((uint64_t)x21 * x55) + (((uint64_t)x23 * x53) + ((uint64_t)x25 * x51)))) + x68);
+ uint64_t x92 = (((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) + (((uint64_t)x21 * x53) + ((uint64_t)x23 * x51))) + x69);
+ ℤ x93 = ((((uint64_t)x5 * x35) + ((uint64_t)x21 * x51)) +ℤ x70);
+ uint64_t x94 = (x86 >> 0x1c);
+ uint32_t x95 = ((uint32_t)x86 & 0xfffffff);
+ uint64_t x96 = (x71 >> 0x1c);
+ uint32_t x97 = (x71 & 0xfffffff);
+ ℤ x98 = ((0x10000000 *ℤ x96) +ℤ x97);
+ uint64_t x99 = (x98 >> 0x1c);
+ uint32_t x100 = (x98 & 0xfffffff);
+ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+ uint64_t x102 = (x101 >> 0x1c);
+ uint32_t x103 = (x101 & 0xfffffff);
+ ℤ x104 = (x93 +ℤ x99);
+ uint64_t x105 = (x104 >> 0x1c);
+ uint32_t x106 = (x104 & 0xfffffff);
+ ℤ x107 = (x102 +ℤ x84);
+ uint64_t x108 = (x107 >> 0x1c);
+ uint32_t x109 = (x107 & 0xfffffff);
+ uint64_t x110 = (x105 + x92);
+ uint64_t x111 = (x110 >> 0x1c);
+ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+ ℤ x113 = (x108 +ℤ x83);
+ uint64_t x114 = (x113 >> 0x1c);
+ uint32_t x115 = (x113 & 0xfffffff);
+ uint64_t x116 = (x111 + x91);
+ uint64_t x117 = (x116 >> 0x1c);
+ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+ ℤ x119 = (x114 +ℤ x82);
+ uint64_t x120 = (x119 >> 0x1c);
+ uint32_t x121 = (x119 & 0xfffffff);
+ uint64_t x122 = (x117 + x90);
+ uint64_t x123 = (x122 >> 0x1c);
+ uint32_t x124 = ((uint32_t)x122 & 0xfffffff);
+ ℤ x125 = (x120 +ℤ x81);
+ uint64_t x126 = (x125 >> 0x1c);
+ uint32_t x127 = (x125 & 0xfffffff);
+ uint64_t x128 = (x123 + x89);
+ uint64_t x129 = (x128 >> 0x1c);
+ uint32_t x130 = ((uint32_t)x128 & 0xfffffff);
+ ℤ x131 = (x126 +ℤ x80);
+ uint64_t x132 = (x131 >> 0x1c);
+ uint32_t x133 = (x131 & 0xfffffff);
+ uint64_t x134 = (x129 + x88);
+ uint64_t x135 = (x134 >> 0x1c);
+ uint32_t x136 = ((uint32_t)x134 & 0xfffffff);
+ ℤ x137 = (x132 +ℤ x79);
+ uint64_t x138 = (x137 >> 0x1c);
+ uint32_t x139 = (x137 & 0xfffffff);
+ uint64_t x140 = (x135 + x87);
+ uint64_t x141 = (x140 >> 0x1c);
+ uint32_t x142 = ((uint32_t)x140 & 0xfffffff);
+ uint64_t x143 = (x138 + x100);
+ uint32_t x144 = (uint32_t) (x143 >> 0x1c);
+ uint32_t x145 = ((uint32_t)x143 & 0xfffffff);
+ uint64_t x146 = (x141 + x95);
+ uint32_t x147 = (uint32_t) (x146 >> 0x1c);
+ uint32_t x148 = ((uint32_t)x146 & 0xfffffff);
+ uint64_t x149 = (((uint64_t)0x10000000 * x144) + x145);
+ uint32_t x150 = (uint32_t) (x149 >> 0x1c);
+ uint32_t x151 = ((uint32_t)x149 & 0xfffffff);
+ uint32_t x152 = ((x147 + x103) + x150);
+ uint32_t x153 = (x152 >> 0x1c);
+ uint32_t x154 = (x152 & 0xfffffff);
+ uint32_t x155 = (x106 + x150);
+ uint32_t x156 = (x155 >> 0x1c);
+ uint32_t x157 = (x155 & 0xfffffff);
+ return (Return x151, Return x139, Return x133, Return x127, Return x121, Return x115, (x153 + x109), Return x154, Return x148, Return x142, Return x136, Return x130, Return x124, Return x118, (x156 + x112), Return x157))
+(x, x0)%core
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e448m2e224m1/fesquare.c b/src/Specific/solinas32_2e448m2e224m1/fesquare.c
new file mode 100644
index 000000000..26103564e
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/fesquare.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+{ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+{ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+{ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+{ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+{ uint64_t x36 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x6 + x22) * (x16 + x29)) +ℤ (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + ((uint64_t)x16 * x22)))))) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+{ uint64_t x37 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x4 + x20) * (x16 + x29)) +ℤ (((uint64_t)(x6 + x22) * (x14 + x30)) +ℤ (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) + (((uint64_t)x6 * x30) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((uint64_t)x16 * x20))))))) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + (((uint64_t)x30 * x6) + ((uint64_t)x29 * x4)))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+{ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x2 + x18) * (x16 + x29)) +ℤ (((uint64_t)(x4 + x20) * (x14 + x30)) +ℤ (((uint64_t)(x6 + x22) * (x12 + x28)) +ℤ (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) + (((uint64_t)x4 * x30) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((uint64_t)x16 * x18)))))))) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + (((uint64_t)x30 * x4) + ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+{ uint64_t x39 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x14 + x30)) +ℤ (((uint64_t)(x4 + x20) * (x12 + x28)) +ℤ (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + ((uint64_t)x14 * x18))))))) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x30 * x2)))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))));
+{ uint64_t x40 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x12 + x28)) +ℤ (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + ((uint64_t)x12 * x18)))))) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2))))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))));
+{ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+{ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+{ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+{ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+{ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+{ ℤ x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+{ ℤ x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) +ℤ x32);
+{ ℤ x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) +ℤ x33);
+{ ℤ x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) +ℤ x34);
+{ ℤ x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) +ℤ x35);
+{ ℤ x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) +ℤ x36);
+{ ℤ x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) +ℤ x37);
+{ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+{ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+{ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+{ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+{ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+{ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+{ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+{ ℤ x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) +ℤ x37);
+{ uint64_t x61 = (x53 >> 0x1c);
+{ uint32_t x62 = ((uint32_t)x53 & 0xfffffff);
+{ uint64_t x63 = (x38 >> 0x1c);
+{ uint32_t x64 = (x38 & 0xfffffff);
+{ ℤ x65 = ((0x10000000 *ℤ x63) +ℤ x64);
+{ uint64_t x66 = (x65 >> 0x1c);
+{ uint32_t x67 = (x65 & 0xfffffff);
+{ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+{ uint64_t x69 = (x68 >> 0x1c);
+{ uint32_t x70 = (x68 & 0xfffffff);
+{ ℤ x71 = (x60 +ℤ x66);
+{ uint64_t x72 = (x71 >> 0x1c);
+{ uint32_t x73 = (x71 & 0xfffffff);
+{ ℤ x74 = (x69 +ℤ x51);
+{ uint64_t x75 = (x74 >> 0x1c);
+{ uint32_t x76 = (x74 & 0xfffffff);
+{ uint64_t x77 = (x72 + x59);
+{ uint64_t x78 = (x77 >> 0x1c);
+{ uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
+{ ℤ x80 = (x75 +ℤ x50);
+{ uint64_t x81 = (x80 >> 0x1c);
+{ uint32_t x82 = (x80 & 0xfffffff);
+{ uint64_t x83 = (x78 + x58);
+{ uint64_t x84 = (x83 >> 0x1c);
+{ uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
+{ ℤ x86 = (x81 +ℤ x49);
+{ uint64_t x87 = (x86 >> 0x1c);
+{ uint32_t x88 = (x86 & 0xfffffff);
+{ uint64_t x89 = (x84 + x57);
+{ uint64_t x90 = (x89 >> 0x1c);
+{ uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
+{ ℤ x92 = (x87 +ℤ x48);
+{ uint64_t x93 = (x92 >> 0x1c);
+{ uint32_t x94 = (x92 & 0xfffffff);
+{ uint64_t x95 = (x90 + x56);
+{ uint64_t x96 = (x95 >> 0x1c);
+{ uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
+{ ℤ x98 = (x93 +ℤ x47);
+{ uint64_t x99 = (x98 >> 0x1c);
+{ uint32_t x100 = (x98 & 0xfffffff);
+{ uint64_t x101 = (x96 + x55);
+{ uint64_t x102 = (x101 >> 0x1c);
+{ uint32_t x103 = ((uint32_t)x101 & 0xfffffff);
+{ ℤ x104 = (x99 +ℤ x46);
+{ uint64_t x105 = (x104 >> 0x1c);
+{ uint32_t x106 = (x104 & 0xfffffff);
+{ uint64_t x107 = (x102 + x54);
+{ uint64_t x108 = (x107 >> 0x1c);
+{ uint32_t x109 = ((uint32_t)x107 & 0xfffffff);
+{ uint64_t x110 = (x105 + x67);
+{ uint32_t x111 = (uint32_t) (x110 >> 0x1c);
+{ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+{ uint64_t x113 = (x108 + x62);
+{ uint32_t x114 = (uint32_t) (x113 >> 0x1c);
+{ uint32_t x115 = ((uint32_t)x113 & 0xfffffff);
+{ uint64_t x116 = (((uint64_t)0x10000000 * x111) + x112);
+{ uint32_t x117 = (uint32_t) (x116 >> 0x1c);
+{ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+{ uint32_t x119 = ((x114 + x70) + x117);
+{ uint32_t x120 = (x119 >> 0x1c);
+{ uint32_t x121 = (x119 & 0xfffffff);
+{ uint32_t x122 = (x73 + x117);
+{ uint32_t x123 = (x122 >> 0x1c);
+{ uint32_t x124 = (x122 & 0xfffffff);
+out[0] = x118;
+out[1] = x106;
+out[2] = x100;
+out[3] = x94;
+out[4] = x88;
+out[5] = x82;
+out[6] = x120 + x76;
+out[7] = x121;
+out[8] = x115;
+out[9] = x109;
+out[10] = x103;
+out[11] = x97;
+out[12] = x91;
+out[13] = x85;
+out[14] = x123 + x79;
+out[15] = x124;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e448m2e224m1/fesquare.h b/src/Specific/solinas32_2e448m2e224m1/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e448m2e224m1/fesquareDisplay.log b/src/Specific/solinas32_2e448m2e224m1/fesquareDisplay.log
new file mode 100644
index 000000000..e1ffcf26b
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/fesquareDisplay.log
@@ -0,0 +1,101 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint64_t x31 = (((uint64_t)(x16 + x29) * (x16 + x29)) - ((uint64_t)x16 * x16));
+ uint64_t x32 = ((((uint64_t)(x14 + x30) * (x16 + x29)) + ((uint64_t)(x16 + x29) * (x14 + x30))) - (((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)));
+ uint64_t x33 = ((((uint64_t)(x12 + x28) * (x16 + x29)) + (((uint64_t)(x14 + x30) * (x14 + x30)) + ((uint64_t)(x16 + x29) * (x12 + x28)))) - (((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))));
+ uint64_t x34 = ((((uint64_t)(x10 + x26) * (x16 + x29)) + (((uint64_t)(x12 + x28) * (x14 + x30)) + (((uint64_t)(x14 + x30) * (x12 + x28)) + ((uint64_t)(x16 + x29) * (x10 + x26))))) - (((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))));
+ uint64_t x35 = ((((uint64_t)(x8 + x24) * (x16 + x29)) + (((uint64_t)(x10 + x26) * (x14 + x30)) + (((uint64_t)(x12 + x28) * (x12 + x28)) + (((uint64_t)(x14 + x30) * (x10 + x26)) + ((uint64_t)(x16 + x29) * (x8 + x24)))))) - (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))));
+ uint64_t x36 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x6 + x22) * (x16 + x29)) +ℤ (((uint64_t)(x8 + x24) * (x14 + x30)) + (((uint64_t)(x10 + x26) * (x12 + x28)) + (((uint64_t)(x12 + x28) * (x10 + x26)) + (((uint64_t)(x14 + x30) * (x8 + x24)) + ((uint64_t)(x16 + x29) * (x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + ((uint64_t)x16 * x22)))))) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ uint64_t x37 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x4 + x20) * (x16 + x29)) +ℤ (((uint64_t)(x6 + x22) * (x14 + x30)) +ℤ (((uint64_t)(x8 + x24) * (x12 + x28)) + (((uint64_t)(x10 + x26) * (x10 + x26)) + (((uint64_t)(x12 + x28) * (x8 + x24)) + (((uint64_t)(x14 + x30) * (x6 + x22)) + ((uint64_t)(x16 + x29) * (x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) + (((uint64_t)x6 * x30) + (((uint64_t)x8 * x28) + (((uint64_t)x10 * x26) + (((uint64_t)x12 * x24) + (((uint64_t)x14 * x22) + ((uint64_t)x16 * x20))))))) + (((uint64_t)x20 * x16) + (((uint64_t)x22 * x14) + (((uint64_t)x24 * x12) + (((uint64_t)x26 * x10) + (((uint64_t)x28 * x8) + (((uint64_t)x30 * x6) + ((uint64_t)x29 * x4)))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))));
+ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)(x2 + x18) * (x16 + x29)) +ℤ (((uint64_t)(x4 + x20) * (x14 + x30)) +ℤ (((uint64_t)(x6 + x22) * (x12 + x28)) +ℤ (((uint64_t)(x8 + x24) * (x10 + x26)) + (((uint64_t)(x10 + x26) * (x8 + x24)) + (((uint64_t)(x12 + x28) * (x6 + x22)) + (((uint64_t)(x14 + x30) * (x4 + x20)) + ((uint64_t)(x16 + x29) * (x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) + (((uint64_t)x4 * x30) + (((uint64_t)x6 * x28) + (((uint64_t)x8 * x26) + (((uint64_t)x10 * x24) + (((uint64_t)x12 * x22) + (((uint64_t)x14 * x20) + ((uint64_t)x16 * x18)))))))) + (((uint64_t)x18 * x16) + (((uint64_t)x20 * x14) + (((uint64_t)x22 * x12) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + (((uint64_t)x28 * x6) + (((uint64_t)x30 * x4) + ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18))))))))));
+ uint64_t x39 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x14 + x30)) +ℤ (((uint64_t)(x4 + x20) * (x12 + x28)) +ℤ (((uint64_t)(x6 + x22) * (x10 + x26)) + (((uint64_t)(x8 + x24) * (x8 + x24)) + (((uint64_t)(x10 + x26) * (x6 + x22)) + (((uint64_t)(x12 + x28) * (x4 + x20)) + ((uint64_t)(x14 + x30) * (x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) + (((uint64_t)x4 * x28) + (((uint64_t)x6 * x26) + (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + (((uint64_t)x12 * x20) + ((uint64_t)x14 * x18))))))) + (((uint64_t)x18 * x14) + (((uint64_t)x20 * x12) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + (((uint64_t)x28 * x4) + ((uint64_t)x30 * x2)))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))));
+ uint64_t x40 = Op (Syntax.IdWithAlt Syntax.TZ (Syntax.TWord 6) (Syntax.TWord 6)) (((((uint64_t)(x2 + x18) * (x12 + x28)) +ℤ (((uint64_t)(x4 + x20) * (x10 + x26)) + (((uint64_t)(x6 + x22) * (x8 + x24)) + (((uint64_t)(x8 + x24) * (x6 + x22)) + (((uint64_t)(x10 + x26) * (x4 + x20)) + ((uint64_t)(x12 + x28) * (x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + ((uint64_t)x12 * x18)))))) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2))))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))));
+ uint64_t x41 = ((((uint64_t)(x2 + x18) * (x10 + x26)) + (((uint64_t)(x4 + x20) * (x8 + x24)) + (((uint64_t)(x6 + x22) * (x6 + x22)) + (((uint64_t)(x8 + x24) * (x4 + x20)) + ((uint64_t)(x10 + x26) * (x2 + x18)))))) - (((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))));
+ uint64_t x42 = ((((uint64_t)(x2 + x18) * (x8 + x24)) + (((uint64_t)(x4 + x20) * (x6 + x22)) + (((uint64_t)(x6 + x22) * (x4 + x20)) + ((uint64_t)(x8 + x24) * (x2 + x18))))) - (((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))));
+ uint64_t x43 = ((((uint64_t)(x2 + x18) * (x6 + x22)) + (((uint64_t)(x4 + x20) * (x4 + x20)) + ((uint64_t)(x6 + x22) * (x2 + x18)))) - (((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))));
+ uint64_t x44 = ((((uint64_t)(x2 + x18) * (x4 + x20)) + ((uint64_t)(x4 + x20) * (x2 + x18))) - (((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)));
+ uint64_t x45 = (((uint64_t)(x2 + x18) * (x2 + x18)) - ((uint64_t)x2 * x2));
+ ℤ x46 = (((((uint64_t)x16 * x16) + ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+ ℤ x47 = ((((((uint64_t)x14 * x16) + ((uint64_t)x16 * x14)) + (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))) + x40) +ℤ x32);
+ ℤ x48 = ((((((uint64_t)x12 * x16) + (((uint64_t)x14 * x14) + ((uint64_t)x16 * x12))) + (((uint64_t)x28 * x29) + (((uint64_t)x30 * x30) + ((uint64_t)x29 * x28)))) + x41) +ℤ x33);
+ ℤ x49 = ((((((uint64_t)x10 * x16) + (((uint64_t)x12 * x14) + (((uint64_t)x14 * x12) + ((uint64_t)x16 * x10)))) + (((uint64_t)x26 * x29) + (((uint64_t)x28 * x30) + (((uint64_t)x30 * x28) + ((uint64_t)x29 * x26))))) + x42) +ℤ x34);
+ ℤ x50 = ((((((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + (((uint64_t)x12 * x12) + (((uint64_t)x14 * x10) + ((uint64_t)x16 * x8))))) + (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + (((uint64_t)x28 * x28) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))) + x43) +ℤ x35);
+ ℤ x51 = ((((((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + ((uint64_t)x16 * x6)))))) + (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))) + x44) +ℤ x36);
+ ℤ x52 = ((((((uint64_t)x4 * x16) + (((uint64_t)x6 * x14) + (((uint64_t)x8 * x12) + (((uint64_t)x10 * x10) + (((uint64_t)x12 * x8) + (((uint64_t)x14 * x6) + ((uint64_t)x16 * x4))))))) + (((uint64_t)x20 * x29) + (((uint64_t)x22 * x30) + (((uint64_t)x24 * x28) + (((uint64_t)x26 * x26) + (((uint64_t)x28 * x24) + (((uint64_t)x30 * x22) + ((uint64_t)x29 * x20)))))))) + x45) +ℤ x37);
+ uint64_t x53 = ((((uint64_t)x2 * x16) + (((uint64_t)x4 * x14) + (((uint64_t)x6 * x12) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + (((uint64_t)x12 * x6) + (((uint64_t)x14 * x4) + ((uint64_t)x16 * x2)))))))) + (((uint64_t)x18 * x29) + (((uint64_t)x20 * x30) + (((uint64_t)x22 * x28) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + (((uint64_t)x28 * x22) + (((uint64_t)x30 * x20) + ((uint64_t)x29 * x18)))))))));
+ uint64_t x54 = (((((uint64_t)x2 * x14) + (((uint64_t)x4 * x12) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + (((uint64_t)x12 * x4) + ((uint64_t)x14 * x2))))))) + (((uint64_t)x18 * x30) + (((uint64_t)x20 * x28) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + (((uint64_t)x28 * x20) + ((uint64_t)x30 * x18)))))))) + x31);
+ uint64_t x55 = (((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + ((uint64_t)x28 * x18))))))) + x32);
+ uint64_t x56 = (((((uint64_t)x2 * x10) + (((uint64_t)x4 * x8) + (((uint64_t)x6 * x6) + (((uint64_t)x8 * x4) + ((uint64_t)x10 * x2))))) + (((uint64_t)x18 * x26) + (((uint64_t)x20 * x24) + (((uint64_t)x22 * x22) + (((uint64_t)x24 * x20) + ((uint64_t)x26 * x18)))))) + x33);
+ uint64_t x57 = (((((uint64_t)x2 * x8) + (((uint64_t)x4 * x6) + (((uint64_t)x6 * x4) + ((uint64_t)x8 * x2)))) + (((uint64_t)x18 * x24) + (((uint64_t)x20 * x22) + (((uint64_t)x22 * x20) + ((uint64_t)x24 * x18))))) + x34);
+ uint64_t x58 = (((((uint64_t)x2 * x6) + (((uint64_t)x4 * x4) + ((uint64_t)x6 * x2))) + (((uint64_t)x18 * x22) + (((uint64_t)x20 * x20) + ((uint64_t)x22 * x18)))) + x35);
+ uint64_t x59 = (((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) + (((uint64_t)x18 * x20) + ((uint64_t)x20 * x18))) + x36);
+ ℤ x60 = ((((uint64_t)x2 * x2) + ((uint64_t)x18 * x18)) +ℤ x37);
+ uint64_t x61 = (x53 >> 0x1c);
+ uint32_t x62 = ((uint32_t)x53 & 0xfffffff);
+ uint64_t x63 = (x38 >> 0x1c);
+ uint32_t x64 = (x38 & 0xfffffff);
+ ℤ x65 = ((0x10000000 *ℤ x63) +ℤ x64);
+ uint64_t x66 = (x65 >> 0x1c);
+ uint32_t x67 = (x65 & 0xfffffff);
+ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+ uint64_t x69 = (x68 >> 0x1c);
+ uint32_t x70 = (x68 & 0xfffffff);
+ ℤ x71 = (x60 +ℤ x66);
+ uint64_t x72 = (x71 >> 0x1c);
+ uint32_t x73 = (x71 & 0xfffffff);
+ ℤ x74 = (x69 +ℤ x51);
+ uint64_t x75 = (x74 >> 0x1c);
+ uint32_t x76 = (x74 & 0xfffffff);
+ uint64_t x77 = (x72 + x59);
+ uint64_t x78 = (x77 >> 0x1c);
+ uint32_t x79 = ((uint32_t)x77 & 0xfffffff);
+ ℤ x80 = (x75 +ℤ x50);
+ uint64_t x81 = (x80 >> 0x1c);
+ uint32_t x82 = (x80 & 0xfffffff);
+ uint64_t x83 = (x78 + x58);
+ uint64_t x84 = (x83 >> 0x1c);
+ uint32_t x85 = ((uint32_t)x83 & 0xfffffff);
+ ℤ x86 = (x81 +ℤ x49);
+ uint64_t x87 = (x86 >> 0x1c);
+ uint32_t x88 = (x86 & 0xfffffff);
+ uint64_t x89 = (x84 + x57);
+ uint64_t x90 = (x89 >> 0x1c);
+ uint32_t x91 = ((uint32_t)x89 & 0xfffffff);
+ ℤ x92 = (x87 +ℤ x48);
+ uint64_t x93 = (x92 >> 0x1c);
+ uint32_t x94 = (x92 & 0xfffffff);
+ uint64_t x95 = (x90 + x56);
+ uint64_t x96 = (x95 >> 0x1c);
+ uint32_t x97 = ((uint32_t)x95 & 0xfffffff);
+ ℤ x98 = (x93 +ℤ x47);
+ uint64_t x99 = (x98 >> 0x1c);
+ uint32_t x100 = (x98 & 0xfffffff);
+ uint64_t x101 = (x96 + x55);
+ uint64_t x102 = (x101 >> 0x1c);
+ uint32_t x103 = ((uint32_t)x101 & 0xfffffff);
+ ℤ x104 = (x99 +ℤ x46);
+ uint64_t x105 = (x104 >> 0x1c);
+ uint32_t x106 = (x104 & 0xfffffff);
+ uint64_t x107 = (x102 + x54);
+ uint64_t x108 = (x107 >> 0x1c);
+ uint32_t x109 = ((uint32_t)x107 & 0xfffffff);
+ uint64_t x110 = (x105 + x67);
+ uint32_t x111 = (uint32_t) (x110 >> 0x1c);
+ uint32_t x112 = ((uint32_t)x110 & 0xfffffff);
+ uint64_t x113 = (x108 + x62);
+ uint32_t x114 = (uint32_t) (x113 >> 0x1c);
+ uint32_t x115 = ((uint32_t)x113 & 0xfffffff);
+ uint64_t x116 = (((uint64_t)0x10000000 * x111) + x112);
+ uint32_t x117 = (uint32_t) (x116 >> 0x1c);
+ uint32_t x118 = ((uint32_t)x116 & 0xfffffff);
+ uint32_t x119 = ((x114 + x70) + x117);
+ uint32_t x120 = (x119 >> 0x1c);
+ uint32_t x121 = (x119 & 0xfffffff);
+ uint32_t x122 = (x73 + x117);
+ uint32_t x123 = (x122 >> 0x1c);
+ uint32_t x124 = (x122 & 0xfffffff);
+ return (Return x118, Return x106, Return x100, Return x94, Return x88, Return x82, (x120 + x76), Return x121, Return x115, Return x109, Return x103, Return x97, Return x91, Return x85, (x123 + x79), Return x124))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e448m2e224m1/freeze.c b/src/Specific/solinas32_2e448m2e224m1/freeze.c
new file mode 100644
index 000000000..40dcf60a9
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 28 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e448m2e224m1/freeze.h b/src/Specific/solinas32_2e448m2e224m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e448m2e224m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e448m2e224m1/freezeDisplay.log b/src/Specific/solinas32_2e448m2e224m1/freezeDisplay.log
index 6c349a76b..25a7c1346 100644
--- a/src/Specific/solinas32_2e448m2e224m1/freezeDisplay.log
+++ b/src/Specific/solinas32_2e448m2e224m1/freezeDisplay.log
@@ -19,37 +19,37 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xfffffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0xfffffff;
+ uint32_t x80 = (x79 & 0xfffffff);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint32_t x84 = x79 & 0xfffffff;
+ uint32_t x84 = (x79 & 0xfffffff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
- uint32_t x88 = x79 & 0xfffffff;
+ uint32_t x88 = (x79 & 0xfffffff);
uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
- uint32_t x92 = x79 & 0xfffffff;
+ uint32_t x92 = (x79 & 0xfffffff);
uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
- uint32_t x96 = x79 & 0xfffffff;
+ uint32_t x96 = (x79 & 0xfffffff);
uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
- uint32_t x100 = x79 & 0xfffffff;
+ uint32_t x100 = (x79 & 0xfffffff);
uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
- uint32_t x104 = x79 & 0xfffffff;
+ uint32_t x104 = (x79 & 0xfffffff);
uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
- uint32_t x108 = x79 & 0xfffffff;
+ uint32_t x108 = (x79 & 0xfffffff);
uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
- uint32_t x112 = x79 & 0xffffffe;
+ uint32_t x112 = (x79 & 0xffffffe);
uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
- uint32_t x116 = x79 & 0xfffffff;
+ uint32_t x116 = (x79 & 0xfffffff);
uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
- uint32_t x120 = x79 & 0xfffffff;
+ uint32_t x120 = (x79 & 0xfffffff);
uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
- uint32_t x124 = x79 & 0xfffffff;
+ uint32_t x124 = (x79 & 0xfffffff);
uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
- uint32_t x128 = x79 & 0xfffffff;
+ uint32_t x128 = (x79 & 0xfffffff);
uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
- uint32_t x132 = x79 & 0xfffffff;
+ uint32_t x132 = (x79 & 0xfffffff);
uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
- uint32_t x136 = x79 & 0xfffffff;
+ uint32_t x136 = (x79 & 0xfffffff);
uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
- uint32_t x140 = x79 & 0xfffffff;
+ uint32_t x140 = (x79 & 0xfffffff);
uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
(Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
diff --git a/src/Specific/solinas32_2e450m2e225m1/freeze.c b/src/Specific/solinas32_2e450m2e225m1/freeze.c
new file mode 100644
index 000000000..1e830873a
--- /dev/null
+++ b/src/Specific/solinas32_2e450m2e225m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x30;
+out[1] = uint8_t x31 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e450m2e225m1/freeze.h b/src/Specific/solinas32_2e450m2e225m1/freeze.h
new file mode 100644
index 000000000..ffbccdea2
--- /dev/null
+++ b/src/Specific/solinas32_2e450m2e225m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x27, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e452m3/femul.c b/src/Specific/solinas32_2e452m3/femul.c
new file mode 100644
index 000000000..fd3b61467
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/femul.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ ℤ x64 = (((uint64_t)x5 * x62) +ℤ ((0x2 * ((uint64_t)x7 * x63)) +ℤ ((0x2 * ((uint64_t)x9 * x61)) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+{ ℤ x65 = ((((uint64_t)x5 * x63) +ℤ ((0x2 * ((uint64_t)x7 * x61)) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + ((0x2 * ((uint64_t)x15 * x53)) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) +ℤ (0x3 * ((uint64_t)x32 * x62)));
+{ ℤ x66 = ((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35)))))))))))))) +ℤ (0x3 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+{ ℤ x67 = ((((uint64_t)x5 * x59) +ℤ ((0x2 * ((uint64_t)x7 * x57)) +ℤ ((0x2 * ((uint64_t)x9 * x55)) +ℤ ((0x2 * ((uint64_t)x11 * x53)) +ℤ (((uint64_t)x13 * x51) +ℤ ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x31 * x62)) + ((0x2 * ((uint64_t)x33 * x63)) + (0x2 * ((uint64_t)x32 * x61))))));
+{ ℤ x68 = ((((uint64_t)x5 * x57) +ℤ ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x29 * x62) + ((0x2 * ((uint64_t)x31 * x63)) + ((0x2 * ((uint64_t)x33 * x61)) + ((uint64_t)x32 * x59))))));
+{ ℤ x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + ((0x2 * ((uint64_t)x31 * x61)) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+{ ℤ x70 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x23 * x35)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+{ ℤ x71 = ((((uint64_t)x5 * x51) +ℤ ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x23 * x62)) + ((0x2 * ((uint64_t)x25 * x63)) + ((0x2 * ((uint64_t)x27 * x61)) + (((uint64_t)x29 * x59) + ((0x2 * ((uint64_t)x31 * x57)) + ((0x2 * ((uint64_t)x33 * x55)) + (0x2 * ((uint64_t)x32 * x53))))))))));
+{ ℤ x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) +ℤ (0x3 *ℤ (((uint64_t)x21 * x62) + ((0x2 * ((uint64_t)x23 * x63)) + ((0x2 * ((uint64_t)x25 * x61)) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + ((0x2 * ((uint64_t)x31 * x55)) + ((0x2 * ((uint64_t)x33 * x53)) + ((uint64_t)x32 * x51))))))))));
+{ ℤ x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) +ℤ (0x3 *ℤ (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + ((0x2 * ((uint64_t)x23 * x61)) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + ((0x2 * ((uint64_t)x31 * x53)) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+{ ℤ x74 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) +ℤ (0x3 *ℤ (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+{ ℤ x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x15 * x62)) +ℤ ((0x2 * ((uint64_t)x17 * x63)) +ℤ ((0x2 * ((uint64_t)x19 * x61)) + (((uint64_t)x21 * x59) + ((0x2 * ((uint64_t)x23 * x57)) + ((0x2 * ((uint64_t)x25 * x55)) + ((0x2 * ((uint64_t)x27 * x53)) + (((uint64_t)x29 * x51) + ((0x2 * ((uint64_t)x31 * x49)) + ((0x2 * ((uint64_t)x33 * x47)) + (0x2 * ((uint64_t)x32 * x45))))))))))))));
+{ ℤ x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x62) +ℤ ((0x2 * ((uint64_t)x15 * x63)) + ((0x2 * ((uint64_t)x17 * x61)) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + ((0x2 * ((uint64_t)x23 * x55)) + ((0x2 * ((uint64_t)x25 * x53)) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((0x2 * ((uint64_t)x31 * x47)) + ((0x2 * ((uint64_t)x33 * x45)) + ((uint64_t)x32 * x43))))))))))))));
+{ ℤ x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + ((0x2 * ((uint64_t)x15 * x61)) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + ((0x2 * ((uint64_t)x23 * x53)) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((0x2 * ((uint64_t)x31 * x45)) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+{ ℤ x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+{ ℤ x79 = (((uint64_t)x5 * x35) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x7 * x62)) +ℤ ((0x2 * ((uint64_t)x9 * x63)) +ℤ ((0x2 * ((uint64_t)x11 * x61)) +ℤ (((uint64_t)x13 * x59) +ℤ ((0x2 * ((uint64_t)x15 * x57)) +ℤ ((0x2 * ((uint64_t)x17 * x55)) +ℤ ((0x2 * ((uint64_t)x19 * x53)) + (((uint64_t)x21 * x51) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + (((uint64_t)x29 * x43) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+{ uint64_t x80 = (x79 >> 0x1d);
+{ uint32_t x81 = (x79 & 0x1fffffff);
+{ ℤ x82 = (x80 +ℤ x78);
+{ uint64_t x83 = (x82 >> 0x1c);
+{ uint32_t x84 = (x82 & 0xfffffff);
+{ ℤ x85 = (x83 +ℤ x77);
+{ uint64_t x86 = (x85 >> 0x1c);
+{ uint32_t x87 = (x85 & 0xfffffff);
+{ ℤ x88 = (x86 +ℤ x76);
+{ uint64_t x89 = (x88 >> 0x1c);
+{ uint32_t x90 = (x88 & 0xfffffff);
+{ ℤ x91 = (x89 +ℤ x75);
+{ uint64_t x92 = (x91 >> 0x1d);
+{ uint32_t x93 = (x91 & 0x1fffffff);
+{ ℤ x94 = (x92 +ℤ x74);
+{ uint64_t x95 = (x94 >> 0x1c);
+{ uint32_t x96 = (x94 & 0xfffffff);
+{ ℤ x97 = (x95 +ℤ x73);
+{ uint64_t x98 = (x97 >> 0x1c);
+{ uint32_t x99 = (x97 & 0xfffffff);
+{ ℤ x100 = (x98 +ℤ x72);
+{ uint64_t x101 = (x100 >> 0x1c);
+{ uint32_t x102 = (x100 & 0xfffffff);
+{ ℤ x103 = (x101 +ℤ x71);
+{ uint64_t x104 = (x103 >> 0x1d);
+{ uint32_t x105 = (x103 & 0x1fffffff);
+{ ℤ x106 = (x104 +ℤ x70);
+{ uint64_t x107 = (x106 >> 0x1c);
+{ uint32_t x108 = (x106 & 0xfffffff);
+{ ℤ x109 = (x107 +ℤ x69);
+{ uint64_t x110 = (x109 >> 0x1c);
+{ uint32_t x111 = (x109 & 0xfffffff);
+{ ℤ x112 = (x110 +ℤ x68);
+{ uint64_t x113 = (x112 >> 0x1c);
+{ uint32_t x114 = (x112 & 0xfffffff);
+{ ℤ x115 = (x113 +ℤ x67);
+{ uint64_t x116 = (x115 >> 0x1d);
+{ uint32_t x117 = (x115 & 0x1fffffff);
+{ ℤ x118 = (x116 +ℤ x66);
+{ uint64_t x119 = (x118 >> 0x1c);
+{ uint32_t x120 = (x118 & 0xfffffff);
+{ ℤ x121 = (x119 +ℤ x65);
+{ uint64_t x122 = (x121 >> 0x1c);
+{ uint32_t x123 = (x121 & 0xfffffff);
+{ ℤ x124 = (x122 +ℤ x64);
+{ uint64_t x125 = (x124 >> 0x1c);
+{ uint32_t x126 = (x124 & 0xfffffff);
+{ uint64_t x127 = (x81 + (0x3 * x125));
+{ uint32_t x128 = (uint32_t) (x127 >> 0x1d);
+{ uint32_t x129 = ((uint32_t)x127 & 0x1fffffff);
+{ uint32_t x130 = (x128 + x84);
+{ uint32_t x131 = (x130 >> 0x1c);
+{ uint32_t x132 = (x130 & 0xfffffff);
+out[0] = x126;
+out[1] = x123;
+out[2] = x120;
+out[3] = x117;
+out[4] = x114;
+out[5] = x111;
+out[6] = x108;
+out[7] = x105;
+out[8] = x102;
+out[9] = x99;
+out[10] = x96;
+out[11] = x93;
+out[12] = x90;
+out[13] = x131 + x87;
+out[14] = x132;
+out[15] = x129;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e452m3/femul.h b/src/Specific/solinas32_2e452m3/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e452m3/femulDisplay.log b/src/Specific/solinas32_2e452m3/femulDisplay.log
index 0efe147f0..ef1ce0740 100644
--- a/src/Specific/solinas32_2e452m3/femulDisplay.log
+++ b/src/Specific/solinas32_2e452m3/femulDisplay.log
@@ -1,84 +1,76 @@
-λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
Interp-η
(λ var : Syntax.base_type → Type,
- λ '(x36, x37, x35, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x70, x71, x69, x67, x65, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39))%core,
- uint64_t x72 = (uint64_t) x5 * x70 + (0x2 * ((uint64_t) x7 * x71) + (0x2 * ((uint64_t) x9 * x69) + (0x2 * ((uint64_t) x11 * x67) + (0x2 * ((uint64_t) x13 * x65) + (0x2 * ((uint64_t) x15 * x63) + (0x2 * ((uint64_t) x17 * x61) + (0x2 * ((uint64_t) x19 * x59) + ((uint64_t) x21 * x57 + ((uint64_t) x23 * x55 + (0x2 * ((uint64_t) x25 * x53) + (0x2 * ((uint64_t) x27 * x51) + (0x2 * ((uint64_t) x29 * x49) + (0x2 * ((uint64_t) x31 * x47) + (0x2 * ((uint64_t) x33 * x45) + (0x2 * ((uint64_t) x35 * x43) + (0x2 * ((uint64_t) x37 * x41) + (uint64_t) x36 * x39))))))))))))))));
- uint64_t x73 = (uint64_t) x5 * x71 + (0x2 * ((uint64_t) x7 * x69) + (0x2 * ((uint64_t) x9 * x67) + (0x2 * ((uint64_t) x11 * x65) + (0x2 * ((uint64_t) x13 * x63) + (0x2 * ((uint64_t) x15 * x61) + (0x2 * ((uint64_t) x17 * x59) + ((uint64_t) x19 * x57 + ((uint64_t) x21 * x55 + ((uint64_t) x23 * x53 + (0x2 * ((uint64_t) x25 * x51) + (0x2 * ((uint64_t) x27 * x49) + (0x2 * ((uint64_t) x29 * x47) + (0x2 * ((uint64_t) x31 * x45) + (0x2 * ((uint64_t) x33 * x43) + (0x2 * ((uint64_t) x35 * x41) + (uint64_t) x37 * x39))))))))))))))) + 0x3 * ((uint64_t) x36 * x70);
- uint64_t x74 = (uint64_t) x5 * x69 + (0x2 * ((uint64_t) x7 * x67) + (0x2 * ((uint64_t) x9 * x65) + (0x2 * ((uint64_t) x11 * x63) + (0x2 * ((uint64_t) x13 * x61) + (0x2 * ((uint64_t) x15 * x59) + ((uint64_t) x17 * x57 + ((uint64_t) x19 * x55 + ((uint64_t) x21 * x53 + ((uint64_t) x23 * x51 + (0x2 * ((uint64_t) x25 * x49) + (0x2 * ((uint64_t) x27 * x47) + (0x2 * ((uint64_t) x29 * x45) + (0x2 * ((uint64_t) x31 * x43) + (0x2 * ((uint64_t) x33 * x41) + (uint64_t) x35 * x39)))))))))))))) + 0x3 * ((uint64_t) x37 * x70 + (uint64_t) x36 * x71);
- uint64_t x75 = (uint64_t) x5 * x67 + (0x2 * ((uint64_t) x7 * x65) + (0x2 * ((uint64_t) x9 * x63) + (0x2 * ((uint64_t) x11 * x61) + (0x2 * ((uint64_t) x13 * x59) + ((uint64_t) x15 * x57 + ((uint64_t) x17 * x55 + ((uint64_t) x19 * x53 + ((uint64_t) x21 * x51 + ((uint64_t) x23 * x49 + (0x2 * ((uint64_t) x25 * x47) + (0x2 * ((uint64_t) x27 * x45) + (0x2 * ((uint64_t) x29 * x43) + (0x2 * ((uint64_t) x31 * x41) + (uint64_t) x33 * x39))))))))))))) + 0x3 * ((uint64_t) x35 * x70 + ((uint64_t) x37 * x71 + (uint64_t) x36 * x69));
- uint64_t x76 = (uint64_t) x5 * x65 + (0x2 * ((uint64_t) x7 * x63) + (0x2 * ((uint64_t) x9 * x61) + (0x2 * ((uint64_t) x11 * x59) + ((uint64_t) x13 * x57 + ((uint64_t) x15 * x55 + ((uint64_t) x17 * x53 + ((uint64_t) x19 * x51 + ((uint64_t) x21 * x49 + ((uint64_t) x23 * x47 + (0x2 * ((uint64_t) x25 * x45) + (0x2 * ((uint64_t) x27 * x43) + (0x2 * ((uint64_t) x29 * x41) + (uint64_t) x31 * x39)))))))))))) + 0x3 * ((uint64_t) x33 * x70 + ((uint64_t) x35 * x71 + ((uint64_t) x37 * x69 + (uint64_t) x36 * x67)));
- uint64_t x77 = (uint64_t) x5 * x63 + (0x2 * ((uint64_t) x7 * x61) + (0x2 * ((uint64_t) x9 * x59) + ((uint64_t) x11 * x57 + ((uint64_t) x13 * x55 + ((uint64_t) x15 * x53 + ((uint64_t) x17 * x51 + ((uint64_t) x19 * x49 + ((uint64_t) x21 * x47 + ((uint64_t) x23 * x45 + (0x2 * ((uint64_t) x25 * x43) + (0x2 * ((uint64_t) x27 * x41) + (uint64_t) x29 * x39))))))))))) + 0x3 * ((uint64_t) x31 * x70 + ((uint64_t) x33 * x71 + ((uint64_t) x35 * x69 + ((uint64_t) x37 * x67 + (uint64_t) x36 * x65))));
- uint64_t x78 = (uint64_t) x5 * x61 + (0x2 * ((uint64_t) x7 * x59) + ((uint64_t) x9 * x57 + ((uint64_t) x11 * x55 + ((uint64_t) x13 * x53 + ((uint64_t) x15 * x51 + ((uint64_t) x17 * x49 + ((uint64_t) x19 * x47 + ((uint64_t) x21 * x45 + ((uint64_t) x23 * x43 + (0x2 * ((uint64_t) x25 * x41) + (uint64_t) x27 * x39)))))))))) + 0x3 * ((uint64_t) x29 * x70 + ((uint64_t) x31 * x71 + ((uint64_t) x33 * x69 + ((uint64_t) x35 * x67 + ((uint64_t) x37 * x65 + (uint64_t) x36 * x63)))));
- uint64_t x79 = (uint64_t) x5 * x59 + ((uint64_t) x7 * x57 + ((uint64_t) x9 * x55 + ((uint64_t) x11 * x53 + ((uint64_t) x13 * x51 + ((uint64_t) x15 * x49 + ((uint64_t) x17 * x47 + ((uint64_t) x19 * x45 + ((uint64_t) x21 * x43 + ((uint64_t) x23 * x41 + (uint64_t) x25 * x39))))))))) + 0x3 * ((uint64_t) x27 * x70 + ((uint64_t) x29 * x71 + ((uint64_t) x31 * x69 + ((uint64_t) x33 * x67 + ((uint64_t) x35 * x65 + ((uint64_t) x37 * x63 + (uint64_t) x36 * x61))))));
- uint64_t x80 = (uint64_t) x5 * x57 + (0x2 * ((uint64_t) x7 * x55) + (0x2 * ((uint64_t) x9 * x53) + (0x2 * ((uint64_t) x11 * x51) + (0x2 * ((uint64_t) x13 * x49) + (0x2 * ((uint64_t) x15 * x47) + (0x2 * ((uint64_t) x17 * x45) + (0x2 * ((uint64_t) x19 * x43) + (0x2 * ((uint64_t) x21 * x41) + (uint64_t) x23 * x39)))))))) + 0x3 * (0x2 * ((uint64_t) x25 * x70) + (0x2 * ((uint64_t) x27 * x71) + (0x2 * ((uint64_t) x29 * x69) + (0x2 * ((uint64_t) x31 * x67) + (0x2 * ((uint64_t) x33 * x65) + (0x2 * ((uint64_t) x35 * x63) + (0x2 * ((uint64_t) x37 * x61) + 0x2 * ((uint64_t) x36 * x59))))))));
- uint64_t x81 = (uint64_t) x5 * x55 + (0x2 * ((uint64_t) x7 * x53) + (0x2 * ((uint64_t) x9 * x51) + (0x2 * ((uint64_t) x11 * x49) + (0x2 * ((uint64_t) x13 * x47) + (0x2 * ((uint64_t) x15 * x45) + (0x2 * ((uint64_t) x17 * x43) + (0x2 * ((uint64_t) x19 * x41) + (uint64_t) x21 * x39))))))) + 0x3 * ((uint64_t) x23 * x70 + (0x2 * ((uint64_t) x25 * x71) + (0x2 * ((uint64_t) x27 * x69) + (0x2 * ((uint64_t) x29 * x67) + (0x2 * ((uint64_t) x31 * x65) + (0x2 * ((uint64_t) x33 * x63) + (0x2 * ((uint64_t) x35 * x61) + (0x2 * ((uint64_t) x37 * x59) + (uint64_t) x36 * x57))))))));
- uint64_t x82 = (uint64_t) x5 * x53 + (0x2 * ((uint64_t) x7 * x51) + (0x2 * ((uint64_t) x9 * x49) + (0x2 * ((uint64_t) x11 * x47) + (0x2 * ((uint64_t) x13 * x45) + (0x2 * ((uint64_t) x15 * x43) + (0x2 * ((uint64_t) x17 * x41) + (uint64_t) x19 * x39)))))) + 0x3 * ((uint64_t) x21 * x70 + ((uint64_t) x23 * x71 + (0x2 * ((uint64_t) x25 * x69) + (0x2 * ((uint64_t) x27 * x67) + (0x2 * ((uint64_t) x29 * x65) + (0x2 * ((uint64_t) x31 * x63) + (0x2 * ((uint64_t) x33 * x61) + (0x2 * ((uint64_t) x35 * x59) + ((uint64_t) x37 * x57 + (uint64_t) x36 * x55)))))))));
- uint64_t x83 = (uint64_t) x5 * x51 + (0x2 * ((uint64_t) x7 * x49) + (0x2 * ((uint64_t) x9 * x47) + (0x2 * ((uint64_t) x11 * x45) + (0x2 * ((uint64_t) x13 * x43) + (0x2 * ((uint64_t) x15 * x41) + (uint64_t) x17 * x39))))) + 0x3 * ((uint64_t) x19 * x70 + ((uint64_t) x21 * x71 + ((uint64_t) x23 * x69 + (0x2 * ((uint64_t) x25 * x67) + (0x2 * ((uint64_t) x27 * x65) + (0x2 * ((uint64_t) x29 * x63) + (0x2 * ((uint64_t) x31 * x61) + (0x2 * ((uint64_t) x33 * x59) + ((uint64_t) x35 * x57 + ((uint64_t) x37 * x55 + (uint64_t) x36 * x53))))))))));
- uint64_t x84 = (uint64_t) x5 * x49 + (0x2 * ((uint64_t) x7 * x47) + (0x2 * ((uint64_t) x9 * x45) + (0x2 * ((uint64_t) x11 * x43) + (0x2 * ((uint64_t) x13 * x41) + (uint64_t) x15 * x39)))) + 0x3 * ((uint64_t) x17 * x70 + ((uint64_t) x19 * x71 + ((uint64_t) x21 * x69 + ((uint64_t) x23 * x67 + (0x2 * ((uint64_t) x25 * x65) + (0x2 * ((uint64_t) x27 * x63) + (0x2 * ((uint64_t) x29 * x61) + (0x2 * ((uint64_t) x31 * x59) + ((uint64_t) x33 * x57 + ((uint64_t) x35 * x55 + ((uint64_t) x37 * x53 + (uint64_t) x36 * x51)))))))))));
- uint64_t x85 = (uint64_t) x5 * x47 + (0x2 * ((uint64_t) x7 * x45) + (0x2 * ((uint64_t) x9 * x43) + (0x2 * ((uint64_t) x11 * x41) + (uint64_t) x13 * x39))) + 0x3 * ((uint64_t) x15 * x70 + ((uint64_t) x17 * x71 + ((uint64_t) x19 * x69 + ((uint64_t) x21 * x67 + ((uint64_t) x23 * x65 + (0x2 * ((uint64_t) x25 * x63) + (0x2 * ((uint64_t) x27 * x61) + (0x2 * ((uint64_t) x29 * x59) + ((uint64_t) x31 * x57 + ((uint64_t) x33 * x55 + ((uint64_t) x35 * x53 + ((uint64_t) x37 * x51 + (uint64_t) x36 * x49))))))))))));
- uint64_t x86 = (uint64_t) x5 * x45 + (0x2 * ((uint64_t) x7 * x43) + (0x2 * ((uint64_t) x9 * x41) + (uint64_t) x11 * x39)) + 0x3 * ((uint64_t) x13 * x70 + ((uint64_t) x15 * x71 + ((uint64_t) x17 * x69 + ((uint64_t) x19 * x67 + ((uint64_t) x21 * x65 + ((uint64_t) x23 * x63 + (0x2 * ((uint64_t) x25 * x61) + (0x2 * ((uint64_t) x27 * x59) + ((uint64_t) x29 * x57 + ((uint64_t) x31 * x55 + ((uint64_t) x33 * x53 + ((uint64_t) x35 * x51 + ((uint64_t) x37 * x49 + (uint64_t) x36 * x47)))))))))))));
- uint64_t x87 = (uint64_t) x5 * x43 + (0x2 * ((uint64_t) x7 * x41) + (uint64_t) x9 * x39) + 0x3 * ((uint64_t) x11 * x70 + ((uint64_t) x13 * x71 + ((uint64_t) x15 * x69 + ((uint64_t) x17 * x67 + ((uint64_t) x19 * x65 + ((uint64_t) x21 * x63 + ((uint64_t) x23 * x61 + (0x2 * ((uint64_t) x25 * x59) + ((uint64_t) x27 * x57 + ((uint64_t) x29 * x55 + ((uint64_t) x31 * x53 + ((uint64_t) x33 * x51 + ((uint64_t) x35 * x49 + ((uint64_t) x37 * x47 + (uint64_t) x36 * x45))))))))))))));
- uint64_t x88 = (uint64_t) x5 * x41 + (uint64_t) x7 * x39 + 0x3 * ((uint64_t) x9 * x70 + ((uint64_t) x11 * x71 + ((uint64_t) x13 * x69 + ((uint64_t) x15 * x67 + ((uint64_t) x17 * x65 + ((uint64_t) x19 * x63 + ((uint64_t) x21 * x61 + ((uint64_t) x23 * x59 + ((uint64_t) x25 * x57 + ((uint64_t) x27 * x55 + ((uint64_t) x29 * x53 + ((uint64_t) x31 * x51 + ((uint64_t) x33 * x49 + ((uint64_t) x35 * x47 + ((uint64_t) x37 * x45 + (uint64_t) x36 * x43)))))))))))))));
- uint64_t x89 = (uint64_t) x5 * x39 + 0x3 * (0x2 * ((uint64_t) x7 * x70) + (0x2 * ((uint64_t) x9 * x71) + (0x2 * ((uint64_t) x11 * x69) + (0x2 * ((uint64_t) x13 * x67) + (0x2 * ((uint64_t) x15 * x65) + (0x2 * ((uint64_t) x17 * x63) + (0x2 * ((uint64_t) x19 * x61) + (0x2 * ((uint64_t) x21 * x59) + ((uint64_t) x23 * x57 + (0x2 * ((uint64_t) x25 * x55) + (0x2 * ((uint64_t) x27 * x53) + (0x2 * ((uint64_t) x29 * x51) + (0x2 * ((uint64_t) x31 * x49) + (0x2 * ((uint64_t) x33 * x47) + (0x2 * ((uint64_t) x35 * x45) + (0x2 * ((uint64_t) x37 * x43) + 0x2 * ((uint64_t) x36 * x41)))))))))))))))));
- uint32_t x90 = (uint32_t) (x89 >> 0x1a);
- uint32_t x91 = (uint32_t) x89 & 0x3ffffff;
- uint64_t x92 = x90 + x88;
- uint32_t x93 = (uint32_t) (x92 >> 0x19);
- uint32_t x94 = (uint32_t) x92 & 0x1ffffff;
- uint64_t x95 = x93 + x87;
- uint32_t x96 = (uint32_t) (x95 >> 0x19);
- uint32_t x97 = (uint32_t) x95 & 0x1ffffff;
- uint64_t x98 = x96 + x86;
- uint32_t x99 = (uint32_t) (x98 >> 0x19);
- uint32_t x100 = (uint32_t) x98 & 0x1ffffff;
- uint64_t x101 = x99 + x85;
- uint32_t x102 = (uint32_t) (x101 >> 0x19);
- uint32_t x103 = (uint32_t) x101 & 0x1ffffff;
- uint64_t x104 = x102 + x84;
- uint32_t x105 = (uint32_t) (x104 >> 0x19);
- uint32_t x106 = (uint32_t) x104 & 0x1ffffff;
- uint64_t x107 = x105 + x83;
- uint32_t x108 = (uint32_t) (x107 >> 0x19);
- uint32_t x109 = (uint32_t) x107 & 0x1ffffff;
- uint64_t x110 = x108 + x82;
- uint32_t x111 = (uint32_t) (x110 >> 0x19);
- uint32_t x112 = (uint32_t) x110 & 0x1ffffff;
- uint64_t x113 = x111 + x81;
- uint32_t x114 = (uint32_t) (x113 >> 0x19);
- uint32_t x115 = (uint32_t) x113 & 0x1ffffff;
- uint64_t x116 = x114 + x80;
- uint32_t x117 = (uint32_t) (x116 >> 0x1a);
- uint32_t x118 = (uint32_t) x116 & 0x3ffffff;
- uint64_t x119 = x117 + x79;
- uint32_t x120 = (uint32_t) (x119 >> 0x19);
- uint32_t x121 = (uint32_t) x119 & 0x1ffffff;
- uint64_t x122 = x120 + x78;
- uint32_t x123 = (uint32_t) (x122 >> 0x19);
- uint32_t x124 = (uint32_t) x122 & 0x1ffffff;
- uint64_t x125 = x123 + x77;
- uint32_t x126 = (uint32_t) (x125 >> 0x19);
- uint32_t x127 = (uint32_t) x125 & 0x1ffffff;
- uint64_t x128 = x126 + x76;
- uint32_t x129 = (uint32_t) (x128 >> 0x19);
- uint32_t x130 = (uint32_t) x128 & 0x1ffffff;
- uint64_t x131 = x129 + x75;
- uint32_t x132 = (uint32_t) (x131 >> 0x19);
- uint32_t x133 = (uint32_t) x131 & 0x1ffffff;
- uint64_t x134 = x132 + x74;
- uint32_t x135 = (uint32_t) (x134 >> 0x19);
- uint32_t x136 = (uint32_t) x134 & 0x1ffffff;
- uint64_t x137 = x135 + x73;
- uint32_t x138 = (uint32_t) (x137 >> 0x19);
- uint32_t x139 = (uint32_t) x137 & 0x1ffffff;
- uint64_t x140 = x138 + x72;
- uint32_t x141 = (uint32_t) (x140 >> 0x19);
- uint32_t x142 = (uint32_t) x140 & 0x1ffffff;
- uint64_t x143 = x91 + (uint64_t) 0x3 * x141;
- uint32_t x144 = (uint32_t) (x143 >> 0x1a);
- uint32_t x145 = (uint32_t) x143 & 0x3ffffff;
- uint32_t x146 = x144 + x94;
- uint32_t x147 = x146 >> 0x19;
- uint32_t x148 = x146 & 0x1ffffff;
- return (Return x142, Return x139, Return x136, Return x133, Return x130, Return x127, Return x124, Return x121, Return x118, Return x115, Return x112, Return x109, Return x106, Return x103, Return x100, x147 + x97, Return x148, Return x145))
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ ℤ x64 = (((uint64_t)x5 * x62) +ℤ ((0x2 * ((uint64_t)x7 * x63)) +ℤ ((0x2 * ((uint64_t)x9 * x61)) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ ((0x2 * ((uint64_t)x15 * x55)) + ((0x2 * ((uint64_t)x17 * x53)) + (((uint64_t)x19 * x51) + (((uint64_t)x21 * x49) + ((0x2 * ((uint64_t)x23 * x47)) + ((0x2 * ((uint64_t)x25 * x45)) + (((uint64_t)x27 * x43) + (((uint64_t)x29 * x41) + ((0x2 * ((uint64_t)x31 * x39)) + ((0x2 * ((uint64_t)x33 * x37)) + ((uint64_t)x32 * x35))))))))))))))));
+ ℤ x65 = ((((uint64_t)x5 * x63) +ℤ ((0x2 * ((uint64_t)x7 * x61)) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) + (((uint64_t)x13 * x55) + ((0x2 * ((uint64_t)x15 * x53)) + (((uint64_t)x17 * x51) + (((uint64_t)x19 * x49) + (((uint64_t)x21 * x47) + ((0x2 * ((uint64_t)x23 * x45)) + (((uint64_t)x25 * x43) + (((uint64_t)x27 * x41) + (((uint64_t)x29 * x39) + ((0x2 * ((uint64_t)x31 * x37)) + ((uint64_t)x33 * x35))))))))))))))) +ℤ (0x3 * ((uint64_t)x32 * x62)));
+ ℤ x66 = ((((uint64_t)x5 * x61) + (((uint64_t)x7 * x59) + (((uint64_t)x9 * x57) + (((uint64_t)x11 * x55) + (((uint64_t)x13 * x53) + (((uint64_t)x15 * x51) + (((uint64_t)x17 * x49) + (((uint64_t)x19 * x47) + (((uint64_t)x21 * x45) + (((uint64_t)x23 * x43) + (((uint64_t)x25 * x41) + (((uint64_t)x27 * x39) + (((uint64_t)x29 * x37) + ((uint64_t)x31 * x35)))))))))))))) +ℤ (0x3 * (((uint64_t)x33 * x62) + ((uint64_t)x32 * x63))));
+ ℤ x67 = ((((uint64_t)x5 * x59) +ℤ ((0x2 * ((uint64_t)x7 * x57)) +ℤ ((0x2 * ((uint64_t)x9 * x55)) +ℤ ((0x2 * ((uint64_t)x11 * x53)) +ℤ (((uint64_t)x13 * x51) +ℤ ((0x2 * ((uint64_t)x15 * x49)) + ((0x2 * ((uint64_t)x17 * x47)) + ((0x2 * ((uint64_t)x19 * x45)) + (((uint64_t)x21 * x43) + ((0x2 * ((uint64_t)x23 * x41)) + ((0x2 * ((uint64_t)x25 * x39)) + ((0x2 * ((uint64_t)x27 * x37)) + ((uint64_t)x29 * x35))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x31 * x62)) + ((0x2 * ((uint64_t)x33 * x63)) + (0x2 * ((uint64_t)x32 * x61))))));
+ ℤ x68 = ((((uint64_t)x5 * x57) +ℤ ((0x2 * ((uint64_t)x7 * x55)) + ((0x2 * ((uint64_t)x9 * x53)) + (((uint64_t)x11 * x51) + (((uint64_t)x13 * x49) + ((0x2 * ((uint64_t)x15 * x47)) + ((0x2 * ((uint64_t)x17 * x45)) + (((uint64_t)x19 * x43) + (((uint64_t)x21 * x41) + ((0x2 * ((uint64_t)x23 * x39)) + ((0x2 * ((uint64_t)x25 * x37)) + ((uint64_t)x27 * x35)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x29 * x62) + ((0x2 * ((uint64_t)x31 * x63)) + ((0x2 * ((uint64_t)x33 * x61)) + ((uint64_t)x32 * x59))))));
+ ℤ x69 = ((((uint64_t)x5 * x55) + ((0x2 * ((uint64_t)x7 * x53)) + (((uint64_t)x9 * x51) + (((uint64_t)x11 * x49) + (((uint64_t)x13 * x47) + ((0x2 * ((uint64_t)x15 * x45)) + (((uint64_t)x17 * x43) + (((uint64_t)x19 * x41) + (((uint64_t)x21 * x39) + ((0x2 * ((uint64_t)x23 * x37)) + ((uint64_t)x25 * x35))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x27 * x62) + (((uint64_t)x29 * x63) + ((0x2 * ((uint64_t)x31 * x61)) + (((uint64_t)x33 * x59) + ((uint64_t)x32 * x57)))))));
+ ℤ x70 = ((((uint64_t)x5 * x53) + (((uint64_t)x7 * x51) + (((uint64_t)x9 * x49) + (((uint64_t)x11 * x47) + (((uint64_t)x13 * x45) + (((uint64_t)x15 * x43) + (((uint64_t)x17 * x41) + (((uint64_t)x19 * x39) + (((uint64_t)x21 * x37) + ((uint64_t)x23 * x35)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x25 * x62) + (((uint64_t)x27 * x63) + (((uint64_t)x29 * x61) + (((uint64_t)x31 * x59) + (((uint64_t)x33 * x57) + ((uint64_t)x32 * x55))))))));
+ ℤ x71 = ((((uint64_t)x5 * x51) +ℤ ((0x2 * ((uint64_t)x7 * x49)) + ((0x2 * ((uint64_t)x9 * x47)) + ((0x2 * ((uint64_t)x11 * x45)) + (((uint64_t)x13 * x43) + ((0x2 * ((uint64_t)x15 * x41)) + ((0x2 * ((uint64_t)x17 * x39)) + ((0x2 * ((uint64_t)x19 * x37)) + ((uint64_t)x21 * x35))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x23 * x62)) + ((0x2 * ((uint64_t)x25 * x63)) + ((0x2 * ((uint64_t)x27 * x61)) + (((uint64_t)x29 * x59) + ((0x2 * ((uint64_t)x31 * x57)) + ((0x2 * ((uint64_t)x33 * x55)) + (0x2 * ((uint64_t)x32 * x53))))))))));
+ ℤ x72 = ((((uint64_t)x5 * x49) + ((0x2 * ((uint64_t)x7 * x47)) + ((0x2 * ((uint64_t)x9 * x45)) + (((uint64_t)x11 * x43) + (((uint64_t)x13 * x41) + ((0x2 * ((uint64_t)x15 * x39)) + ((0x2 * ((uint64_t)x17 * x37)) + ((uint64_t)x19 * x35)))))))) +ℤ (0x3 *ℤ (((uint64_t)x21 * x62) + ((0x2 * ((uint64_t)x23 * x63)) + ((0x2 * ((uint64_t)x25 * x61)) + (((uint64_t)x27 * x59) + (((uint64_t)x29 * x57) + ((0x2 * ((uint64_t)x31 * x55)) + ((0x2 * ((uint64_t)x33 * x53)) + ((uint64_t)x32 * x51))))))))));
+ ℤ x73 = ((((uint64_t)x5 * x47) + ((0x2 * ((uint64_t)x7 * x45)) + (((uint64_t)x9 * x43) + (((uint64_t)x11 * x41) + (((uint64_t)x13 * x39) + ((0x2 * ((uint64_t)x15 * x37)) + ((uint64_t)x17 * x35))))))) +ℤ (0x3 *ℤ (((uint64_t)x19 * x62) + (((uint64_t)x21 * x63) + ((0x2 * ((uint64_t)x23 * x61)) + (((uint64_t)x25 * x59) + (((uint64_t)x27 * x57) + (((uint64_t)x29 * x55) + ((0x2 * ((uint64_t)x31 * x53)) + (((uint64_t)x33 * x51) + ((uint64_t)x32 * x49)))))))))));
+ ℤ x74 = ((((uint64_t)x5 * x45) + (((uint64_t)x7 * x43) + (((uint64_t)x9 * x41) + (((uint64_t)x11 * x39) + (((uint64_t)x13 * x37) + ((uint64_t)x15 * x35)))))) +ℤ (0x3 *ℤ (((uint64_t)x17 * x62) + (((uint64_t)x19 * x63) + (((uint64_t)x21 * x61) + (((uint64_t)x23 * x59) + (((uint64_t)x25 * x57) + (((uint64_t)x27 * x55) + (((uint64_t)x29 * x53) + (((uint64_t)x31 * x51) + (((uint64_t)x33 * x49) + ((uint64_t)x32 * x47))))))))))));
+ ℤ x75 = ((((uint64_t)x5 * x43) + ((0x2 * ((uint64_t)x7 * x41)) + ((0x2 * ((uint64_t)x9 * x39)) + ((0x2 * ((uint64_t)x11 * x37)) + ((uint64_t)x13 * x35))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x15 * x62)) +ℤ ((0x2 * ((uint64_t)x17 * x63)) +ℤ ((0x2 * ((uint64_t)x19 * x61)) + (((uint64_t)x21 * x59) + ((0x2 * ((uint64_t)x23 * x57)) + ((0x2 * ((uint64_t)x25 * x55)) + ((0x2 * ((uint64_t)x27 * x53)) + (((uint64_t)x29 * x51) + ((0x2 * ((uint64_t)x31 * x49)) + ((0x2 * ((uint64_t)x33 * x47)) + (0x2 * ((uint64_t)x32 * x45))))))))))))));
+ ℤ x76 = ((((uint64_t)x5 * x41) + ((0x2 * ((uint64_t)x7 * x39)) + ((0x2 * ((uint64_t)x9 * x37)) + ((uint64_t)x11 * x35)))) +ℤ (0x3 *ℤ (((uint64_t)x13 * x62) +ℤ ((0x2 * ((uint64_t)x15 * x63)) + ((0x2 * ((uint64_t)x17 * x61)) + (((uint64_t)x19 * x59) + (((uint64_t)x21 * x57) + ((0x2 * ((uint64_t)x23 * x55)) + ((0x2 * ((uint64_t)x25 * x53)) + (((uint64_t)x27 * x51) + (((uint64_t)x29 * x49) + ((0x2 * ((uint64_t)x31 * x47)) + ((0x2 * ((uint64_t)x33 * x45)) + ((uint64_t)x32 * x43))))))))))))));
+ ℤ x77 = ((((uint64_t)x5 * x39) + ((0x2 * ((uint64_t)x7 * x37)) + ((uint64_t)x9 * x35))) +ℤ (0x3 *ℤ (((uint64_t)x11 * x62) + (((uint64_t)x13 * x63) + ((0x2 * ((uint64_t)x15 * x61)) + (((uint64_t)x17 * x59) + (((uint64_t)x19 * x57) + (((uint64_t)x21 * x55) + ((0x2 * ((uint64_t)x23 * x53)) + (((uint64_t)x25 * x51) + (((uint64_t)x27 * x49) + (((uint64_t)x29 * x47) + ((0x2 * ((uint64_t)x31 * x45)) + (((uint64_t)x33 * x43) + ((uint64_t)x32 * x41)))))))))))))));
+ ℤ x78 = ((((uint64_t)x5 * x37) + ((uint64_t)x7 * x35)) +ℤ (0x3 *ℤ (((uint64_t)x9 * x62) + (((uint64_t)x11 * x63) + (((uint64_t)x13 * x61) + (((uint64_t)x15 * x59) + (((uint64_t)x17 * x57) + (((uint64_t)x19 * x55) + (((uint64_t)x21 * x53) + (((uint64_t)x23 * x51) + (((uint64_t)x25 * x49) + (((uint64_t)x27 * x47) + (((uint64_t)x29 * x45) + (((uint64_t)x31 * x43) + (((uint64_t)x33 * x41) + ((uint64_t)x32 * x39))))))))))))))));
+ ℤ x79 = (((uint64_t)x5 * x35) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x7 * x62)) +ℤ ((0x2 * ((uint64_t)x9 * x63)) +ℤ ((0x2 * ((uint64_t)x11 * x61)) +ℤ (((uint64_t)x13 * x59) +ℤ ((0x2 * ((uint64_t)x15 * x57)) +ℤ ((0x2 * ((uint64_t)x17 * x55)) +ℤ ((0x2 * ((uint64_t)x19 * x53)) + (((uint64_t)x21 * x51) + ((0x2 * ((uint64_t)x23 * x49)) + ((0x2 * ((uint64_t)x25 * x47)) + ((0x2 * ((uint64_t)x27 * x45)) + (((uint64_t)x29 * x43) + ((0x2 * ((uint64_t)x31 * x41)) + ((0x2 * ((uint64_t)x33 * x39)) + (0x2 * ((uint64_t)x32 * x37))))))))))))))))));
+ uint64_t x80 = (x79 >> 0x1d);
+ uint32_t x81 = (x79 & 0x1fffffff);
+ ℤ x82 = (x80 +ℤ x78);
+ uint64_t x83 = (x82 >> 0x1c);
+ uint32_t x84 = (x82 & 0xfffffff);
+ ℤ x85 = (x83 +ℤ x77);
+ uint64_t x86 = (x85 >> 0x1c);
+ uint32_t x87 = (x85 & 0xfffffff);
+ ℤ x88 = (x86 +ℤ x76);
+ uint64_t x89 = (x88 >> 0x1c);
+ uint32_t x90 = (x88 & 0xfffffff);
+ ℤ x91 = (x89 +ℤ x75);
+ uint64_t x92 = (x91 >> 0x1d);
+ uint32_t x93 = (x91 & 0x1fffffff);
+ ℤ x94 = (x92 +ℤ x74);
+ uint64_t x95 = (x94 >> 0x1c);
+ uint32_t x96 = (x94 & 0xfffffff);
+ ℤ x97 = (x95 +ℤ x73);
+ uint64_t x98 = (x97 >> 0x1c);
+ uint32_t x99 = (x97 & 0xfffffff);
+ ℤ x100 = (x98 +ℤ x72);
+ uint64_t x101 = (x100 >> 0x1c);
+ uint32_t x102 = (x100 & 0xfffffff);
+ ℤ x103 = (x101 +ℤ x71);
+ uint64_t x104 = (x103 >> 0x1d);
+ uint32_t x105 = (x103 & 0x1fffffff);
+ ℤ x106 = (x104 +ℤ x70);
+ uint64_t x107 = (x106 >> 0x1c);
+ uint32_t x108 = (x106 & 0xfffffff);
+ ℤ x109 = (x107 +ℤ x69);
+ uint64_t x110 = (x109 >> 0x1c);
+ uint32_t x111 = (x109 & 0xfffffff);
+ ℤ x112 = (x110 +ℤ x68);
+ uint64_t x113 = (x112 >> 0x1c);
+ uint32_t x114 = (x112 & 0xfffffff);
+ ℤ x115 = (x113 +ℤ x67);
+ uint64_t x116 = (x115 >> 0x1d);
+ uint32_t x117 = (x115 & 0x1fffffff);
+ ℤ x118 = (x116 +ℤ x66);
+ uint64_t x119 = (x118 >> 0x1c);
+ uint32_t x120 = (x118 & 0xfffffff);
+ ℤ x121 = (x119 +ℤ x65);
+ uint64_t x122 = (x121 >> 0x1c);
+ uint32_t x123 = (x121 & 0xfffffff);
+ ℤ x124 = (x122 +ℤ x64);
+ uint64_t x125 = (x124 >> 0x1c);
+ uint32_t x126 = (x124 & 0xfffffff);
+ uint64_t x127 = (x81 + (0x3 * x125));
+ uint32_t x128 = (uint32_t) (x127 >> 0x1d);
+ uint32_t x129 = ((uint32_t)x127 & 0x1fffffff);
+ uint32_t x130 = (x128 + x84);
+ uint32_t x131 = (x130 >> 0x1c);
+ uint32_t x132 = (x130 & 0xfffffff);
+ return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, (x131 + x87), Return x132, Return x129))
(x, x0)%core
- : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e452m3/fesquare.c b/src/Specific/solinas32_2e452m3/fesquare.c
new file mode 100644
index 000000000..31169c7b1
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/fesquare.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ ℤ x31 = (((uint64_t)x2 * x29) +ℤ ((0x2 * ((uint64_t)x4 * x30)) +ℤ ((0x2 * ((uint64_t)x6 * x28)) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+{ ℤ x32 = ((((uint64_t)x2 * x30) +ℤ ((0x2 * ((uint64_t)x4 * x28)) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) +ℤ (0x3 * ((uint64_t)x29 * x29)));
+{ ℤ x33 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) +ℤ (0x3 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+{ ℤ x34 = ((((uint64_t)x2 * x26) +ℤ ((0x2 * ((uint64_t)x4 * x24)) +ℤ ((0x2 * ((uint64_t)x6 * x22)) +ℤ ((0x2 * ((uint64_t)x8 * x20)) +ℤ (((uint64_t)x10 * x18) +ℤ ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x28 * x29)) + ((0x2 * ((uint64_t)x30 * x30)) + (0x2 * ((uint64_t)x29 * x28))))));
+{ ℤ x35 = ((((uint64_t)x2 * x24) +ℤ ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x26 * x29) + ((0x2 * ((uint64_t)x28 * x30)) + ((0x2 * ((uint64_t)x30 * x28)) + ((uint64_t)x29 * x26))))));
+{ ℤ x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + ((0x2 * ((uint64_t)x28 * x28)) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+{ ℤ x37 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+{ ℤ x38 = ((((uint64_t)x2 * x18) +ℤ ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x20 * x29)) + ((0x2 * ((uint64_t)x22 * x30)) + ((0x2 * ((uint64_t)x24 * x28)) + (((uint64_t)x26 * x26) + ((0x2 * ((uint64_t)x28 * x24)) + ((0x2 * ((uint64_t)x30 * x22)) + (0x2 * ((uint64_t)x29 * x20))))))))));
+{ ℤ x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) +ℤ (0x3 *ℤ (((uint64_t)x18 * x29) + ((0x2 * ((uint64_t)x20 * x30)) + ((0x2 * ((uint64_t)x22 * x28)) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((0x2 * ((uint64_t)x28 * x22)) + ((0x2 * ((uint64_t)x30 * x20)) + ((uint64_t)x29 * x18))))))))));
+{ ℤ x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (0x3 *ℤ (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + ((0x2 * ((uint64_t)x20 * x28)) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((0x2 * ((uint64_t)x28 * x20)) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+{ ℤ x41 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 *ℤ (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+{ ℤ x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x12 * x29)) +ℤ ((0x2 * ((uint64_t)x14 * x30)) +ℤ ((0x2 * ((uint64_t)x16 * x28)) + (((uint64_t)x18 * x26) + ((0x2 * ((uint64_t)x20 * x24)) + ((0x2 * ((uint64_t)x22 * x22)) + ((0x2 * ((uint64_t)x24 * x20)) + (((uint64_t)x26 * x18) + ((0x2 * ((uint64_t)x28 * x16)) + ((0x2 * ((uint64_t)x30 * x14)) + (0x2 * ((uint64_t)x29 * x12))))))))))))));
+{ ℤ x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x29) +ℤ ((0x2 * ((uint64_t)x12 * x30)) + ((0x2 * ((uint64_t)x14 * x28)) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((0x2 * ((uint64_t)x28 * x14)) + ((0x2 * ((uint64_t)x30 * x12)) + ((uint64_t)x29 * x10))))))))))))));
+{ ℤ x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + ((0x2 * ((uint64_t)x12 * x28)) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((0x2 * ((uint64_t)x28 * x12)) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+{ ℤ x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+{ ℤ x46 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x4 * x29)) +ℤ ((0x2 * ((uint64_t)x6 * x30)) +ℤ ((0x2 * ((uint64_t)x8 * x28)) +ℤ (((uint64_t)x10 * x26) +ℤ ((0x2 * ((uint64_t)x12 * x24)) +ℤ ((0x2 * ((uint64_t)x14 * x22)) +ℤ ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + (((uint64_t)x26 * x10) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+{ uint64_t x47 = (x46 >> 0x1d);
+{ uint32_t x48 = (x46 & 0x1fffffff);
+{ ℤ x49 = (x47 +ℤ x45);
+{ uint64_t x50 = (x49 >> 0x1c);
+{ uint32_t x51 = (x49 & 0xfffffff);
+{ ℤ x52 = (x50 +ℤ x44);
+{ uint64_t x53 = (x52 >> 0x1c);
+{ uint32_t x54 = (x52 & 0xfffffff);
+{ ℤ x55 = (x53 +ℤ x43);
+{ uint64_t x56 = (x55 >> 0x1c);
+{ uint32_t x57 = (x55 & 0xfffffff);
+{ ℤ x58 = (x56 +ℤ x42);
+{ uint64_t x59 = (x58 >> 0x1d);
+{ uint32_t x60 = (x58 & 0x1fffffff);
+{ ℤ x61 = (x59 +ℤ x41);
+{ uint64_t x62 = (x61 >> 0x1c);
+{ uint32_t x63 = (x61 & 0xfffffff);
+{ ℤ x64 = (x62 +ℤ x40);
+{ uint64_t x65 = (x64 >> 0x1c);
+{ uint32_t x66 = (x64 & 0xfffffff);
+{ ℤ x67 = (x65 +ℤ x39);
+{ uint64_t x68 = (x67 >> 0x1c);
+{ uint32_t x69 = (x67 & 0xfffffff);
+{ ℤ x70 = (x68 +ℤ x38);
+{ uint64_t x71 = (x70 >> 0x1d);
+{ uint32_t x72 = (x70 & 0x1fffffff);
+{ ℤ x73 = (x71 +ℤ x37);
+{ uint64_t x74 = (x73 >> 0x1c);
+{ uint32_t x75 = (x73 & 0xfffffff);
+{ ℤ x76 = (x74 +ℤ x36);
+{ uint64_t x77 = (x76 >> 0x1c);
+{ uint32_t x78 = (x76 & 0xfffffff);
+{ ℤ x79 = (x77 +ℤ x35);
+{ uint64_t x80 = (x79 >> 0x1c);
+{ uint32_t x81 = (x79 & 0xfffffff);
+{ ℤ x82 = (x80 +ℤ x34);
+{ uint64_t x83 = (x82 >> 0x1d);
+{ uint32_t x84 = (x82 & 0x1fffffff);
+{ ℤ x85 = (x83 +ℤ x33);
+{ uint64_t x86 = (x85 >> 0x1c);
+{ uint32_t x87 = (x85 & 0xfffffff);
+{ ℤ x88 = (x86 +ℤ x32);
+{ uint64_t x89 = (x88 >> 0x1c);
+{ uint32_t x90 = (x88 & 0xfffffff);
+{ ℤ x91 = (x89 +ℤ x31);
+{ uint64_t x92 = (x91 >> 0x1c);
+{ uint32_t x93 = (x91 & 0xfffffff);
+{ uint64_t x94 = (x48 + (0x3 * x92));
+{ uint32_t x95 = (uint32_t) (x94 >> 0x1d);
+{ uint32_t x96 = ((uint32_t)x94 & 0x1fffffff);
+{ uint32_t x97 = (x95 + x51);
+{ uint32_t x98 = (x97 >> 0x1c);
+{ uint32_t x99 = (x97 & 0xfffffff);
+out[0] = x93;
+out[1] = x90;
+out[2] = x87;
+out[3] = x84;
+out[4] = x81;
+out[5] = x78;
+out[6] = x75;
+out[7] = x72;
+out[8] = x69;
+out[9] = x66;
+out[10] = x63;
+out[11] = x60;
+out[12] = x57;
+out[13] = x98 + x54;
+out[14] = x99;
+out[15] = x96;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e452m3/fesquare.h b/src/Specific/solinas32_2e452m3/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e452m3/fesquareDisplay.log b/src/Specific/solinas32_2e452m3/fesquareDisplay.log
index c66ada6b6..b85722267 100644
--- a/src/Specific/solinas32_2e452m3/fesquareDisplay.log
+++ b/src/Specific/solinas32_2e452m3/fesquareDisplay.log
@@ -1,84 +1,76 @@
-λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
Interp-η
(λ var : Syntax.base_type → Type,
- λ '(x33, x34, x32, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
- uint64_t x35 = (uint64_t) x2 * x33 + (0x2 * ((uint64_t) x4 * x34) + (0x2 * ((uint64_t) x6 * x32) + (0x2 * ((uint64_t) x8 * x30) + (0x2 * ((uint64_t) x10 * x28) + (0x2 * ((uint64_t) x12 * x26) + (0x2 * ((uint64_t) x14 * x24) + (0x2 * ((uint64_t) x16 * x22) + ((uint64_t) x18 * x20 + ((uint64_t) x20 * x18 + (0x2 * ((uint64_t) x22 * x16) + (0x2 * ((uint64_t) x24 * x14) + (0x2 * ((uint64_t) x26 * x12) + (0x2 * ((uint64_t) x28 * x10) + (0x2 * ((uint64_t) x30 * x8) + (0x2 * ((uint64_t) x32 * x6) + (0x2 * ((uint64_t) x34 * x4) + (uint64_t) x33 * x2))))))))))))))));
- uint64_t x36 = (uint64_t) x2 * x34 + (0x2 * ((uint64_t) x4 * x32) + (0x2 * ((uint64_t) x6 * x30) + (0x2 * ((uint64_t) x8 * x28) + (0x2 * ((uint64_t) x10 * x26) + (0x2 * ((uint64_t) x12 * x24) + (0x2 * ((uint64_t) x14 * x22) + ((uint64_t) x16 * x20 + ((uint64_t) x18 * x18 + ((uint64_t) x20 * x16 + (0x2 * ((uint64_t) x22 * x14) + (0x2 * ((uint64_t) x24 * x12) + (0x2 * ((uint64_t) x26 * x10) + (0x2 * ((uint64_t) x28 * x8) + (0x2 * ((uint64_t) x30 * x6) + (0x2 * ((uint64_t) x32 * x4) + (uint64_t) x34 * x2))))))))))))))) + 0x3 * ((uint64_t) x33 * x33);
- uint64_t x37 = (uint64_t) x2 * x32 + (0x2 * ((uint64_t) x4 * x30) + (0x2 * ((uint64_t) x6 * x28) + (0x2 * ((uint64_t) x8 * x26) + (0x2 * ((uint64_t) x10 * x24) + (0x2 * ((uint64_t) x12 * x22) + ((uint64_t) x14 * x20 + ((uint64_t) x16 * x18 + ((uint64_t) x18 * x16 + ((uint64_t) x20 * x14 + (0x2 * ((uint64_t) x22 * x12) + (0x2 * ((uint64_t) x24 * x10) + (0x2 * ((uint64_t) x26 * x8) + (0x2 * ((uint64_t) x28 * x6) + (0x2 * ((uint64_t) x30 * x4) + (uint64_t) x32 * x2)))))))))))))) + 0x3 * ((uint64_t) x34 * x33 + (uint64_t) x33 * x34);
- uint64_t x38 = (uint64_t) x2 * x30 + (0x2 * ((uint64_t) x4 * x28) + (0x2 * ((uint64_t) x6 * x26) + (0x2 * ((uint64_t) x8 * x24) + (0x2 * ((uint64_t) x10 * x22) + ((uint64_t) x12 * x20 + ((uint64_t) x14 * x18 + ((uint64_t) x16 * x16 + ((uint64_t) x18 * x14 + ((uint64_t) x20 * x12 + (0x2 * ((uint64_t) x22 * x10) + (0x2 * ((uint64_t) x24 * x8) + (0x2 * ((uint64_t) x26 * x6) + (0x2 * ((uint64_t) x28 * x4) + (uint64_t) x30 * x2))))))))))))) + 0x3 * ((uint64_t) x32 * x33 + ((uint64_t) x34 * x34 + (uint64_t) x33 * x32));
- uint64_t x39 = (uint64_t) x2 * x28 + (0x2 * ((uint64_t) x4 * x26) + (0x2 * ((uint64_t) x6 * x24) + (0x2 * ((uint64_t) x8 * x22) + ((uint64_t) x10 * x20 + ((uint64_t) x12 * x18 + ((uint64_t) x14 * x16 + ((uint64_t) x16 * x14 + ((uint64_t) x18 * x12 + ((uint64_t) x20 * x10 + (0x2 * ((uint64_t) x22 * x8) + (0x2 * ((uint64_t) x24 * x6) + (0x2 * ((uint64_t) x26 * x4) + (uint64_t) x28 * x2)))))))))))) + 0x3 * ((uint64_t) x30 * x33 + ((uint64_t) x32 * x34 + ((uint64_t) x34 * x32 + (uint64_t) x33 * x30)));
- uint64_t x40 = (uint64_t) x2 * x26 + (0x2 * ((uint64_t) x4 * x24) + (0x2 * ((uint64_t) x6 * x22) + ((uint64_t) x8 * x20 + ((uint64_t) x10 * x18 + ((uint64_t) x12 * x16 + ((uint64_t) x14 * x14 + ((uint64_t) x16 * x12 + ((uint64_t) x18 * x10 + ((uint64_t) x20 * x8 + (0x2 * ((uint64_t) x22 * x6) + (0x2 * ((uint64_t) x24 * x4) + (uint64_t) x26 * x2))))))))))) + 0x3 * ((uint64_t) x28 * x33 + ((uint64_t) x30 * x34 + ((uint64_t) x32 * x32 + ((uint64_t) x34 * x30 + (uint64_t) x33 * x28))));
- uint64_t x41 = (uint64_t) x2 * x24 + (0x2 * ((uint64_t) x4 * x22) + ((uint64_t) x6 * x20 + ((uint64_t) x8 * x18 + ((uint64_t) x10 * x16 + ((uint64_t) x12 * x14 + ((uint64_t) x14 * x12 + ((uint64_t) x16 * x10 + ((uint64_t) x18 * x8 + ((uint64_t) x20 * x6 + (0x2 * ((uint64_t) x22 * x4) + (uint64_t) x24 * x2)))))))))) + 0x3 * ((uint64_t) x26 * x33 + ((uint64_t) x28 * x34 + ((uint64_t) x30 * x32 + ((uint64_t) x32 * x30 + ((uint64_t) x34 * x28 + (uint64_t) x33 * x26)))));
- uint64_t x42 = (uint64_t) x2 * x22 + ((uint64_t) x4 * x20 + ((uint64_t) x6 * x18 + ((uint64_t) x8 * x16 + ((uint64_t) x10 * x14 + ((uint64_t) x12 * x12 + ((uint64_t) x14 * x10 + ((uint64_t) x16 * x8 + ((uint64_t) x18 * x6 + ((uint64_t) x20 * x4 + (uint64_t) x22 * x2))))))))) + 0x3 * ((uint64_t) x24 * x33 + ((uint64_t) x26 * x34 + ((uint64_t) x28 * x32 + ((uint64_t) x30 * x30 + ((uint64_t) x32 * x28 + ((uint64_t) x34 * x26 + (uint64_t) x33 * x24))))));
- uint64_t x43 = (uint64_t) x2 * x20 + (0x2 * ((uint64_t) x4 * x18) + (0x2 * ((uint64_t) x6 * x16) + (0x2 * ((uint64_t) x8 * x14) + (0x2 * ((uint64_t) x10 * x12) + (0x2 * ((uint64_t) x12 * x10) + (0x2 * ((uint64_t) x14 * x8) + (0x2 * ((uint64_t) x16 * x6) + (0x2 * ((uint64_t) x18 * x4) + (uint64_t) x20 * x2)))))))) + 0x3 * (0x2 * ((uint64_t) x22 * x33) + (0x2 * ((uint64_t) x24 * x34) + (0x2 * ((uint64_t) x26 * x32) + (0x2 * ((uint64_t) x28 * x30) + (0x2 * ((uint64_t) x30 * x28) + (0x2 * ((uint64_t) x32 * x26) + (0x2 * ((uint64_t) x34 * x24) + 0x2 * ((uint64_t) x33 * x22))))))));
- uint64_t x44 = (uint64_t) x2 * x18 + (0x2 * ((uint64_t) x4 * x16) + (0x2 * ((uint64_t) x6 * x14) + (0x2 * ((uint64_t) x8 * x12) + (0x2 * ((uint64_t) x10 * x10) + (0x2 * ((uint64_t) x12 * x8) + (0x2 * ((uint64_t) x14 * x6) + (0x2 * ((uint64_t) x16 * x4) + (uint64_t) x18 * x2))))))) + 0x3 * ((uint64_t) x20 * x33 + (0x2 * ((uint64_t) x22 * x34) + (0x2 * ((uint64_t) x24 * x32) + (0x2 * ((uint64_t) x26 * x30) + (0x2 * ((uint64_t) x28 * x28) + (0x2 * ((uint64_t) x30 * x26) + (0x2 * ((uint64_t) x32 * x24) + (0x2 * ((uint64_t) x34 * x22) + (uint64_t) x33 * x20))))))));
- uint64_t x45 = (uint64_t) x2 * x16 + (0x2 * ((uint64_t) x4 * x14) + (0x2 * ((uint64_t) x6 * x12) + (0x2 * ((uint64_t) x8 * x10) + (0x2 * ((uint64_t) x10 * x8) + (0x2 * ((uint64_t) x12 * x6) + (0x2 * ((uint64_t) x14 * x4) + (uint64_t) x16 * x2)))))) + 0x3 * ((uint64_t) x18 * x33 + ((uint64_t) x20 * x34 + (0x2 * ((uint64_t) x22 * x32) + (0x2 * ((uint64_t) x24 * x30) + (0x2 * ((uint64_t) x26 * x28) + (0x2 * ((uint64_t) x28 * x26) + (0x2 * ((uint64_t) x30 * x24) + (0x2 * ((uint64_t) x32 * x22) + ((uint64_t) x34 * x20 + (uint64_t) x33 * x18)))))))));
- uint64_t x46 = (uint64_t) x2 * x14 + (0x2 * ((uint64_t) x4 * x12) + (0x2 * ((uint64_t) x6 * x10) + (0x2 * ((uint64_t) x8 * x8) + (0x2 * ((uint64_t) x10 * x6) + (0x2 * ((uint64_t) x12 * x4) + (uint64_t) x14 * x2))))) + 0x3 * ((uint64_t) x16 * x33 + ((uint64_t) x18 * x34 + ((uint64_t) x20 * x32 + (0x2 * ((uint64_t) x22 * x30) + (0x2 * ((uint64_t) x24 * x28) + (0x2 * ((uint64_t) x26 * x26) + (0x2 * ((uint64_t) x28 * x24) + (0x2 * ((uint64_t) x30 * x22) + ((uint64_t) x32 * x20 + ((uint64_t) x34 * x18 + (uint64_t) x33 * x16))))))))));
- uint64_t x47 = (uint64_t) x2 * x12 + (0x2 * ((uint64_t) x4 * x10) + (0x2 * ((uint64_t) x6 * x8) + (0x2 * ((uint64_t) x8 * x6) + (0x2 * ((uint64_t) x10 * x4) + (uint64_t) x12 * x2)))) + 0x3 * ((uint64_t) x14 * x33 + ((uint64_t) x16 * x34 + ((uint64_t) x18 * x32 + ((uint64_t) x20 * x30 + (0x2 * ((uint64_t) x22 * x28) + (0x2 * ((uint64_t) x24 * x26) + (0x2 * ((uint64_t) x26 * x24) + (0x2 * ((uint64_t) x28 * x22) + ((uint64_t) x30 * x20 + ((uint64_t) x32 * x18 + ((uint64_t) x34 * x16 + (uint64_t) x33 * x14)))))))))));
- uint64_t x48 = (uint64_t) x2 * x10 + (0x2 * ((uint64_t) x4 * x8) + (0x2 * ((uint64_t) x6 * x6) + (0x2 * ((uint64_t) x8 * x4) + (uint64_t) x10 * x2))) + 0x3 * ((uint64_t) x12 * x33 + ((uint64_t) x14 * x34 + ((uint64_t) x16 * x32 + ((uint64_t) x18 * x30 + ((uint64_t) x20 * x28 + (0x2 * ((uint64_t) x22 * x26) + (0x2 * ((uint64_t) x24 * x24) + (0x2 * ((uint64_t) x26 * x22) + ((uint64_t) x28 * x20 + ((uint64_t) x30 * x18 + ((uint64_t) x32 * x16 + ((uint64_t) x34 * x14 + (uint64_t) x33 * x12))))))))))));
- uint64_t x49 = (uint64_t) x2 * x8 + (0x2 * ((uint64_t) x4 * x6) + (0x2 * ((uint64_t) x6 * x4) + (uint64_t) x8 * x2)) + 0x3 * ((uint64_t) x10 * x33 + ((uint64_t) x12 * x34 + ((uint64_t) x14 * x32 + ((uint64_t) x16 * x30 + ((uint64_t) x18 * x28 + ((uint64_t) x20 * x26 + (0x2 * ((uint64_t) x22 * x24) + (0x2 * ((uint64_t) x24 * x22) + ((uint64_t) x26 * x20 + ((uint64_t) x28 * x18 + ((uint64_t) x30 * x16 + ((uint64_t) x32 * x14 + ((uint64_t) x34 * x12 + (uint64_t) x33 * x10)))))))))))));
- uint64_t x50 = (uint64_t) x2 * x6 + (0x2 * ((uint64_t) x4 * x4) + (uint64_t) x6 * x2) + 0x3 * ((uint64_t) x8 * x33 + ((uint64_t) x10 * x34 + ((uint64_t) x12 * x32 + ((uint64_t) x14 * x30 + ((uint64_t) x16 * x28 + ((uint64_t) x18 * x26 + ((uint64_t) x20 * x24 + (0x2 * ((uint64_t) x22 * x22) + ((uint64_t) x24 * x20 + ((uint64_t) x26 * x18 + ((uint64_t) x28 * x16 + ((uint64_t) x30 * x14 + ((uint64_t) x32 * x12 + ((uint64_t) x34 * x10 + (uint64_t) x33 * x8))))))))))))));
- uint64_t x51 = (uint64_t) x2 * x4 + (uint64_t) x4 * x2 + 0x3 * ((uint64_t) x6 * x33 + ((uint64_t) x8 * x34 + ((uint64_t) x10 * x32 + ((uint64_t) x12 * x30 + ((uint64_t) x14 * x28 + ((uint64_t) x16 * x26 + ((uint64_t) x18 * x24 + ((uint64_t) x20 * x22 + ((uint64_t) x22 * x20 + ((uint64_t) x24 * x18 + ((uint64_t) x26 * x16 + ((uint64_t) x28 * x14 + ((uint64_t) x30 * x12 + ((uint64_t) x32 * x10 + ((uint64_t) x34 * x8 + (uint64_t) x33 * x6)))))))))))))));
- uint64_t x52 = (uint64_t) x2 * x2 + 0x3 * (0x2 * ((uint64_t) x4 * x33) + (0x2 * ((uint64_t) x6 * x34) + (0x2 * ((uint64_t) x8 * x32) + (0x2 * ((uint64_t) x10 * x30) + (0x2 * ((uint64_t) x12 * x28) + (0x2 * ((uint64_t) x14 * x26) + (0x2 * ((uint64_t) x16 * x24) + (0x2 * ((uint64_t) x18 * x22) + ((uint64_t) x20 * x20 + (0x2 * ((uint64_t) x22 * x18) + (0x2 * ((uint64_t) x24 * x16) + (0x2 * ((uint64_t) x26 * x14) + (0x2 * ((uint64_t) x28 * x12) + (0x2 * ((uint64_t) x30 * x10) + (0x2 * ((uint64_t) x32 * x8) + (0x2 * ((uint64_t) x34 * x6) + 0x2 * ((uint64_t) x33 * x4)))))))))))))))));
- uint32_t x53 = (uint32_t) (x52 >> 0x1a);
- uint32_t x54 = (uint32_t) x52 & 0x3ffffff;
- uint64_t x55 = x53 + x51;
- uint32_t x56 = (uint32_t) (x55 >> 0x19);
- uint32_t x57 = (uint32_t) x55 & 0x1ffffff;
- uint64_t x58 = x56 + x50;
- uint32_t x59 = (uint32_t) (x58 >> 0x19);
- uint32_t x60 = (uint32_t) x58 & 0x1ffffff;
- uint64_t x61 = x59 + x49;
- uint32_t x62 = (uint32_t) (x61 >> 0x19);
- uint32_t x63 = (uint32_t) x61 & 0x1ffffff;
- uint64_t x64 = x62 + x48;
- uint32_t x65 = (uint32_t) (x64 >> 0x19);
- uint32_t x66 = (uint32_t) x64 & 0x1ffffff;
- uint64_t x67 = x65 + x47;
- uint32_t x68 = (uint32_t) (x67 >> 0x19);
- uint32_t x69 = (uint32_t) x67 & 0x1ffffff;
- uint64_t x70 = x68 + x46;
- uint32_t x71 = (uint32_t) (x70 >> 0x19);
- uint32_t x72 = (uint32_t) x70 & 0x1ffffff;
- uint64_t x73 = x71 + x45;
- uint32_t x74 = (uint32_t) (x73 >> 0x19);
- uint32_t x75 = (uint32_t) x73 & 0x1ffffff;
- uint64_t x76 = x74 + x44;
- uint32_t x77 = (uint32_t) (x76 >> 0x19);
- uint32_t x78 = (uint32_t) x76 & 0x1ffffff;
- uint64_t x79 = x77 + x43;
- uint32_t x80 = (uint32_t) (x79 >> 0x1a);
- uint32_t x81 = (uint32_t) x79 & 0x3ffffff;
- uint64_t x82 = x80 + x42;
- uint32_t x83 = (uint32_t) (x82 >> 0x19);
- uint32_t x84 = (uint32_t) x82 & 0x1ffffff;
- uint64_t x85 = x83 + x41;
- uint32_t x86 = (uint32_t) (x85 >> 0x19);
- uint32_t x87 = (uint32_t) x85 & 0x1ffffff;
- uint64_t x88 = x86 + x40;
- uint32_t x89 = (uint32_t) (x88 >> 0x19);
- uint32_t x90 = (uint32_t) x88 & 0x1ffffff;
- uint64_t x91 = x89 + x39;
- uint32_t x92 = (uint32_t) (x91 >> 0x19);
- uint32_t x93 = (uint32_t) x91 & 0x1ffffff;
- uint64_t x94 = x92 + x38;
- uint32_t x95 = (uint32_t) (x94 >> 0x19);
- uint32_t x96 = (uint32_t) x94 & 0x1ffffff;
- uint64_t x97 = x95 + x37;
- uint32_t x98 = (uint32_t) (x97 >> 0x19);
- uint32_t x99 = (uint32_t) x97 & 0x1ffffff;
- uint64_t x100 = x98 + x36;
- uint32_t x101 = (uint32_t) (x100 >> 0x19);
- uint32_t x102 = (uint32_t) x100 & 0x1ffffff;
- uint64_t x103 = x101 + x35;
- uint32_t x104 = (uint32_t) (x103 >> 0x19);
- uint32_t x105 = (uint32_t) x103 & 0x1ffffff;
- uint64_t x106 = x54 + (uint64_t) 0x3 * x104;
- uint32_t x107 = (uint32_t) (x106 >> 0x1a);
- uint32_t x108 = (uint32_t) x106 & 0x3ffffff;
- uint32_t x109 = x107 + x57;
- uint32_t x110 = x109 >> 0x19;
- uint32_t x111 = x109 & 0x1ffffff;
- return (Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, x110 + x60, Return x111, Return x108))
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ ℤ x31 = (((uint64_t)x2 * x29) +ℤ ((0x2 * ((uint64_t)x4 * x30)) +ℤ ((0x2 * ((uint64_t)x6 * x28)) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ ((0x2 * ((uint64_t)x12 * x22)) + ((0x2 * ((uint64_t)x14 * x20)) + (((uint64_t)x16 * x18) + (((uint64_t)x18 * x16) + ((0x2 * ((uint64_t)x20 * x14)) + ((0x2 * ((uint64_t)x22 * x12)) + (((uint64_t)x24 * x10) + (((uint64_t)x26 * x8) + ((0x2 * ((uint64_t)x28 * x6)) + ((0x2 * ((uint64_t)x30 * x4)) + ((uint64_t)x29 * x2))))))))))))))));
+ ℤ x32 = ((((uint64_t)x2 * x30) +ℤ ((0x2 * ((uint64_t)x4 * x28)) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) + (((uint64_t)x10 * x22) + ((0x2 * ((uint64_t)x12 * x20)) + (((uint64_t)x14 * x18) + (((uint64_t)x16 * x16) + (((uint64_t)x18 * x14) + ((0x2 * ((uint64_t)x20 * x12)) + (((uint64_t)x22 * x10) + (((uint64_t)x24 * x8) + (((uint64_t)x26 * x6) + ((0x2 * ((uint64_t)x28 * x4)) + ((uint64_t)x30 * x2))))))))))))))) +ℤ (0x3 * ((uint64_t)x29 * x29)));
+ ℤ x33 = ((((uint64_t)x2 * x28) + (((uint64_t)x4 * x26) + (((uint64_t)x6 * x24) + (((uint64_t)x8 * x22) + (((uint64_t)x10 * x20) + (((uint64_t)x12 * x18) + (((uint64_t)x14 * x16) + (((uint64_t)x16 * x14) + (((uint64_t)x18 * x12) + (((uint64_t)x20 * x10) + (((uint64_t)x22 * x8) + (((uint64_t)x24 * x6) + (((uint64_t)x26 * x4) + ((uint64_t)x28 * x2)))))))))))))) +ℤ (0x3 * (((uint64_t)x30 * x29) + ((uint64_t)x29 * x30))));
+ ℤ x34 = ((((uint64_t)x2 * x26) +ℤ ((0x2 * ((uint64_t)x4 * x24)) +ℤ ((0x2 * ((uint64_t)x6 * x22)) +ℤ ((0x2 * ((uint64_t)x8 * x20)) +ℤ (((uint64_t)x10 * x18) +ℤ ((0x2 * ((uint64_t)x12 * x16)) + ((0x2 * ((uint64_t)x14 * x14)) + ((0x2 * ((uint64_t)x16 * x12)) + (((uint64_t)x18 * x10) + ((0x2 * ((uint64_t)x20 * x8)) + ((0x2 * ((uint64_t)x22 * x6)) + ((0x2 * ((uint64_t)x24 * x4)) + ((uint64_t)x26 * x2))))))))))))) +ℤ (0x3 * ((0x2 * ((uint64_t)x28 * x29)) + ((0x2 * ((uint64_t)x30 * x30)) + (0x2 * ((uint64_t)x29 * x28))))));
+ ℤ x35 = ((((uint64_t)x2 * x24) +ℤ ((0x2 * ((uint64_t)x4 * x22)) + ((0x2 * ((uint64_t)x6 * x20)) + (((uint64_t)x8 * x18) + (((uint64_t)x10 * x16) + ((0x2 * ((uint64_t)x12 * x14)) + ((0x2 * ((uint64_t)x14 * x12)) + (((uint64_t)x16 * x10) + (((uint64_t)x18 * x8) + ((0x2 * ((uint64_t)x20 * x6)) + ((0x2 * ((uint64_t)x22 * x4)) + ((uint64_t)x24 * x2)))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x26 * x29) + ((0x2 * ((uint64_t)x28 * x30)) + ((0x2 * ((uint64_t)x30 * x28)) + ((uint64_t)x29 * x26))))));
+ ℤ x36 = ((((uint64_t)x2 * x22) + ((0x2 * ((uint64_t)x4 * x20)) + (((uint64_t)x6 * x18) + (((uint64_t)x8 * x16) + (((uint64_t)x10 * x14) + ((0x2 * ((uint64_t)x12 * x12)) + (((uint64_t)x14 * x10) + (((uint64_t)x16 * x8) + (((uint64_t)x18 * x6) + ((0x2 * ((uint64_t)x20 * x4)) + ((uint64_t)x22 * x2))))))))))) +ℤ (0x3 *ℤ (((uint64_t)x24 * x29) + (((uint64_t)x26 * x30) + ((0x2 * ((uint64_t)x28 * x28)) + (((uint64_t)x30 * x26) + ((uint64_t)x29 * x24)))))));
+ ℤ x37 = ((((uint64_t)x2 * x20) + (((uint64_t)x4 * x18) + (((uint64_t)x6 * x16) + (((uint64_t)x8 * x14) + (((uint64_t)x10 * x12) + (((uint64_t)x12 * x10) + (((uint64_t)x14 * x8) + (((uint64_t)x16 * x6) + (((uint64_t)x18 * x4) + ((uint64_t)x20 * x2)))))))))) +ℤ (0x3 *ℤ (((uint64_t)x22 * x29) + (((uint64_t)x24 * x30) + (((uint64_t)x26 * x28) + (((uint64_t)x28 * x26) + (((uint64_t)x30 * x24) + ((uint64_t)x29 * x22))))))));
+ ℤ x38 = ((((uint64_t)x2 * x18) +ℤ ((0x2 * ((uint64_t)x4 * x16)) + ((0x2 * ((uint64_t)x6 * x14)) + ((0x2 * ((uint64_t)x8 * x12)) + (((uint64_t)x10 * x10) + ((0x2 * ((uint64_t)x12 * x8)) + ((0x2 * ((uint64_t)x14 * x6)) + ((0x2 * ((uint64_t)x16 * x4)) + ((uint64_t)x18 * x2))))))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x20 * x29)) + ((0x2 * ((uint64_t)x22 * x30)) + ((0x2 * ((uint64_t)x24 * x28)) + (((uint64_t)x26 * x26) + ((0x2 * ((uint64_t)x28 * x24)) + ((0x2 * ((uint64_t)x30 * x22)) + (0x2 * ((uint64_t)x29 * x20))))))))));
+ ℤ x39 = ((((uint64_t)x2 * x16) + ((0x2 * ((uint64_t)x4 * x14)) + ((0x2 * ((uint64_t)x6 * x12)) + (((uint64_t)x8 * x10) + (((uint64_t)x10 * x8) + ((0x2 * ((uint64_t)x12 * x6)) + ((0x2 * ((uint64_t)x14 * x4)) + ((uint64_t)x16 * x2)))))))) +ℤ (0x3 *ℤ (((uint64_t)x18 * x29) + ((0x2 * ((uint64_t)x20 * x30)) + ((0x2 * ((uint64_t)x22 * x28)) + (((uint64_t)x24 * x26) + (((uint64_t)x26 * x24) + ((0x2 * ((uint64_t)x28 * x22)) + ((0x2 * ((uint64_t)x30 * x20)) + ((uint64_t)x29 * x18))))))))));
+ ℤ x40 = ((((uint64_t)x2 * x14) + ((0x2 * ((uint64_t)x4 * x12)) + (((uint64_t)x6 * x10) + (((uint64_t)x8 * x8) + (((uint64_t)x10 * x6) + ((0x2 * ((uint64_t)x12 * x4)) + ((uint64_t)x14 * x2))))))) +ℤ (0x3 *ℤ (((uint64_t)x16 * x29) + (((uint64_t)x18 * x30) + ((0x2 * ((uint64_t)x20 * x28)) + (((uint64_t)x22 * x26) + (((uint64_t)x24 * x24) + (((uint64_t)x26 * x22) + ((0x2 * ((uint64_t)x28 * x20)) + (((uint64_t)x30 * x18) + ((uint64_t)x29 * x16)))))))))));
+ ℤ x41 = ((((uint64_t)x2 * x12) + (((uint64_t)x4 * x10) + (((uint64_t)x6 * x8) + (((uint64_t)x8 * x6) + (((uint64_t)x10 * x4) + ((uint64_t)x12 * x2)))))) +ℤ (0x3 *ℤ (((uint64_t)x14 * x29) + (((uint64_t)x16 * x30) + (((uint64_t)x18 * x28) + (((uint64_t)x20 * x26) + (((uint64_t)x22 * x24) + (((uint64_t)x24 * x22) + (((uint64_t)x26 * x20) + (((uint64_t)x28 * x18) + (((uint64_t)x30 * x16) + ((uint64_t)x29 * x14))))))))))));
+ ℤ x42 = ((((uint64_t)x2 * x10) + ((0x2 * ((uint64_t)x4 * x8)) + ((0x2 * ((uint64_t)x6 * x6)) + ((0x2 * ((uint64_t)x8 * x4)) + ((uint64_t)x10 * x2))))) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x12 * x29)) +ℤ ((0x2 * ((uint64_t)x14 * x30)) +ℤ ((0x2 * ((uint64_t)x16 * x28)) + (((uint64_t)x18 * x26) + ((0x2 * ((uint64_t)x20 * x24)) + ((0x2 * ((uint64_t)x22 * x22)) + ((0x2 * ((uint64_t)x24 * x20)) + (((uint64_t)x26 * x18) + ((0x2 * ((uint64_t)x28 * x16)) + ((0x2 * ((uint64_t)x30 * x14)) + (0x2 * ((uint64_t)x29 * x12))))))))))))));
+ ℤ x43 = ((((uint64_t)x2 * x8) + ((0x2 * ((uint64_t)x4 * x6)) + ((0x2 * ((uint64_t)x6 * x4)) + ((uint64_t)x8 * x2)))) +ℤ (0x3 *ℤ (((uint64_t)x10 * x29) +ℤ ((0x2 * ((uint64_t)x12 * x30)) + ((0x2 * ((uint64_t)x14 * x28)) + (((uint64_t)x16 * x26) + (((uint64_t)x18 * x24) + ((0x2 * ((uint64_t)x20 * x22)) + ((0x2 * ((uint64_t)x22 * x20)) + (((uint64_t)x24 * x18) + (((uint64_t)x26 * x16) + ((0x2 * ((uint64_t)x28 * x14)) + ((0x2 * ((uint64_t)x30 * x12)) + ((uint64_t)x29 * x10))))))))))))));
+ ℤ x44 = ((((uint64_t)x2 * x6) + ((0x2 * ((uint64_t)x4 * x4)) + ((uint64_t)x6 * x2))) +ℤ (0x3 *ℤ (((uint64_t)x8 * x29) + (((uint64_t)x10 * x30) + ((0x2 * ((uint64_t)x12 * x28)) + (((uint64_t)x14 * x26) + (((uint64_t)x16 * x24) + (((uint64_t)x18 * x22) + ((0x2 * ((uint64_t)x20 * x20)) + (((uint64_t)x22 * x18) + (((uint64_t)x24 * x16) + (((uint64_t)x26 * x14) + ((0x2 * ((uint64_t)x28 * x12)) + (((uint64_t)x30 * x10) + ((uint64_t)x29 * x8)))))))))))))));
+ ℤ x45 = ((((uint64_t)x2 * x4) + ((uint64_t)x4 * x2)) +ℤ (0x3 *ℤ (((uint64_t)x6 * x29) + (((uint64_t)x8 * x30) + (((uint64_t)x10 * x28) + (((uint64_t)x12 * x26) + (((uint64_t)x14 * x24) + (((uint64_t)x16 * x22) + (((uint64_t)x18 * x20) + (((uint64_t)x20 * x18) + (((uint64_t)x22 * x16) + (((uint64_t)x24 * x14) + (((uint64_t)x26 * x12) + (((uint64_t)x28 * x10) + (((uint64_t)x30 * x8) + ((uint64_t)x29 * x6))))))))))))))));
+ ℤ x46 = (((uint64_t)x2 * x2) +ℤ (0x3 *ℤ ((0x2 * ((uint64_t)x4 * x29)) +ℤ ((0x2 * ((uint64_t)x6 * x30)) +ℤ ((0x2 * ((uint64_t)x8 * x28)) +ℤ (((uint64_t)x10 * x26) +ℤ ((0x2 * ((uint64_t)x12 * x24)) +ℤ ((0x2 * ((uint64_t)x14 * x22)) +ℤ ((0x2 * ((uint64_t)x16 * x20)) + (((uint64_t)x18 * x18) + ((0x2 * ((uint64_t)x20 * x16)) + ((0x2 * ((uint64_t)x22 * x14)) + ((0x2 * ((uint64_t)x24 * x12)) + (((uint64_t)x26 * x10) + ((0x2 * ((uint64_t)x28 * x8)) + ((0x2 * ((uint64_t)x30 * x6)) + (0x2 * ((uint64_t)x29 * x4))))))))))))))))));
+ uint64_t x47 = (x46 >> 0x1d);
+ uint32_t x48 = (x46 & 0x1fffffff);
+ ℤ x49 = (x47 +ℤ x45);
+ uint64_t x50 = (x49 >> 0x1c);
+ uint32_t x51 = (x49 & 0xfffffff);
+ ℤ x52 = (x50 +ℤ x44);
+ uint64_t x53 = (x52 >> 0x1c);
+ uint32_t x54 = (x52 & 0xfffffff);
+ ℤ x55 = (x53 +ℤ x43);
+ uint64_t x56 = (x55 >> 0x1c);
+ uint32_t x57 = (x55 & 0xfffffff);
+ ℤ x58 = (x56 +ℤ x42);
+ uint64_t x59 = (x58 >> 0x1d);
+ uint32_t x60 = (x58 & 0x1fffffff);
+ ℤ x61 = (x59 +ℤ x41);
+ uint64_t x62 = (x61 >> 0x1c);
+ uint32_t x63 = (x61 & 0xfffffff);
+ ℤ x64 = (x62 +ℤ x40);
+ uint64_t x65 = (x64 >> 0x1c);
+ uint32_t x66 = (x64 & 0xfffffff);
+ ℤ x67 = (x65 +ℤ x39);
+ uint64_t x68 = (x67 >> 0x1c);
+ uint32_t x69 = (x67 & 0xfffffff);
+ ℤ x70 = (x68 +ℤ x38);
+ uint64_t x71 = (x70 >> 0x1d);
+ uint32_t x72 = (x70 & 0x1fffffff);
+ ℤ x73 = (x71 +ℤ x37);
+ uint64_t x74 = (x73 >> 0x1c);
+ uint32_t x75 = (x73 & 0xfffffff);
+ ℤ x76 = (x74 +ℤ x36);
+ uint64_t x77 = (x76 >> 0x1c);
+ uint32_t x78 = (x76 & 0xfffffff);
+ ℤ x79 = (x77 +ℤ x35);
+ uint64_t x80 = (x79 >> 0x1c);
+ uint32_t x81 = (x79 & 0xfffffff);
+ ℤ x82 = (x80 +ℤ x34);
+ uint64_t x83 = (x82 >> 0x1d);
+ uint32_t x84 = (x82 & 0x1fffffff);
+ ℤ x85 = (x83 +ℤ x33);
+ uint64_t x86 = (x85 >> 0x1c);
+ uint32_t x87 = (x85 & 0xfffffff);
+ ℤ x88 = (x86 +ℤ x32);
+ uint64_t x89 = (x88 >> 0x1c);
+ uint32_t x90 = (x88 & 0xfffffff);
+ ℤ x91 = (x89 +ℤ x31);
+ uint64_t x92 = (x91 >> 0x1c);
+ uint32_t x93 = (x91 & 0xfffffff);
+ uint64_t x94 = (x48 + (0x3 * x92));
+ uint32_t x95 = (uint32_t) (x94 >> 0x1d);
+ uint32_t x96 = ((uint32_t)x94 & 0x1fffffff);
+ uint32_t x97 = (x95 + x51);
+ uint32_t x98 = (x97 >> 0x1c);
+ uint32_t x99 = (x97 & 0xfffffff);
+ return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, (x98 + x54), Return x99, Return x96))
x
- : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e452m3/freeze.c b/src/Specific/solinas32_2e452m3/freeze.c
new file mode 100644
index 000000000..ddf7dd7c3
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 29 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e452m3/freeze.h b/src/Specific/solinas32_2e452m3/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e452m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e452m3/freezeDisplay.log b/src/Specific/solinas32_2e452m3/freezeDisplay.log
index e99be948a..07e0cb10a 100644
--- a/src/Specific/solinas32_2e452m3/freezeDisplay.log
+++ b/src/Specific/solinas32_2e452m3/freezeDisplay.log
@@ -1,62 +1,56 @@
-λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
Interp-η
(λ var : Syntax.base_type → Type,
- λ '(x33, x34, x32, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
- uint32_t x36, uint8_t x37 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x3fffffd);
- uint32_t x39, uint8_t x40 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x37, Return x4, 0x1ffffff);
- uint32_t x42, uint8_t x43 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x40, Return x6, 0x1ffffff);
- uint32_t x45, uint8_t x46 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x43, Return x8, 0x1ffffff);
- uint32_t x48, uint8_t x49 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x46, Return x10, 0x1ffffff);
- uint32_t x51, uint8_t x52 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x49, Return x12, 0x1ffffff);
- uint32_t x54, uint8_t x55 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x52, Return x14, 0x1ffffff);
- uint32_t x57, uint8_t x58 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x55, Return x16, 0x1ffffff);
- uint32_t x60, uint8_t x61 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x58, Return x18, 0x1ffffff);
- uint32_t x63, uint8_t x64 = Op (Syntax.SubWithGetBorrow 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x61, Return x20, 0x3ffffff);
- uint32_t x66, uint8_t x67 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x64, Return x22, 0x1ffffff);
- uint32_t x69, uint8_t x70 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x67, Return x24, 0x1ffffff);
- uint32_t x72, uint8_t x73 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x70, Return x26, 0x1ffffff);
- uint32_t x75, uint8_t x76 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x73, Return x28, 0x1ffffff);
- uint32_t x78, uint8_t x79 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x76, Return x30, 0x1ffffff);
- uint32_t x81, uint8_t x82 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x79, Return x32, 0x1ffffff);
- uint32_t x84, uint8_t x85 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x82, Return x34, 0x1ffffff);
- uint32_t x87, uint8_t x88 = Op (Syntax.SubWithGetBorrow 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x85, Return x33, 0x1ffffff);
- uint32_t x89 = (uint32_t)cmovznz(x88, 0x0, 0xffffffff);
- uint32_t x90 = x89 & 0x3fffffd;
- uint32_t x92, uint8_t x93 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x36, Return x90);
- uint32_t x94 = x89 & 0x1ffffff;
- uint32_t x96, uint8_t x97 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x93, Return x39, Return x94);
- uint32_t x98 = x89 & 0x1ffffff;
- uint32_t x100, uint8_t x101 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x97, Return x42, Return x98);
- uint32_t x102 = x89 & 0x1ffffff;
- uint32_t x104, uint8_t x105 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x101, Return x45, Return x102);
- uint32_t x106 = x89 & 0x1ffffff;
- uint32_t x108, uint8_t x109 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x105, Return x48, Return x106);
- uint32_t x110 = x89 & 0x1ffffff;
- uint32_t x112, uint8_t x113 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x109, Return x51, Return x110);
- uint32_t x114 = x89 & 0x1ffffff;
- uint32_t x116, uint8_t x117 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x113, Return x54, Return x114);
- uint32_t x118 = x89 & 0x1ffffff;
- uint32_t x120, uint8_t x121 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x117, Return x57, Return x118);
- uint32_t x122 = x89 & 0x1ffffff;
- uint32_t x124, uint8_t x125 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x121, Return x60, Return x122);
- uint32_t x126 = x89 & 0x3ffffff;
- uint32_t x128, uint8_t x129 = Op (Syntax.AddWithGetCarry 26 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x125, Return x63, Return x126);
- uint32_t x130 = x89 & 0x1ffffff;
- uint32_t x132, uint8_t x133 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x129, Return x66, Return x130);
- uint32_t x134 = x89 & 0x1ffffff;
- uint32_t x136, uint8_t x137 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x133, Return x69, Return x134);
- uint32_t x138 = x89 & 0x1ffffff;
- uint32_t x140, uint8_t x141 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x137, Return x72, Return x138);
- uint32_t x142 = x89 & 0x1ffffff;
- uint32_t x144, uint8_t x145 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x141, Return x75, Return x142);
- uint32_t x146 = x89 & 0x1ffffff;
- uint32_t x148, uint8_t x149 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x145, Return x78, Return x146);
- uint32_t x150 = x89 & 0x1ffffff;
- uint32_t x152, uint8_t x153 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x149, Return x81, Return x150);
- uint32_t x154 = x89 & 0x1ffffff;
- uint32_t x156, uint8_t x157 = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x153, Return x84, Return x154);
- uint32_t x158 = x89 & 0x1ffffff;
- uint32_t x160, uint8_t _ = Op (Syntax.AddWithGetCarry 25 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x157, Return x87, Return x158);
- (Return x160, Return x156, Return x152, Return x148, Return x144, Return x140, Return x136, Return x132, Return x128, Return x124, Return x120, Return x116, Return x112, Return x108, Return x104, Return x100, Return x96, Return x92))
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint32_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x2, 0x1ffffffd);
+ uint32_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x33, Return x4, 0xfffffff);
+ uint32_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x36, Return x6, 0xfffffff);
+ uint32_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x39, Return x8, 0xfffffff);
+ uint32_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x42, Return x10, 0x1fffffff);
+ uint32_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x45, Return x12, 0xfffffff);
+ uint32_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x48, Return x14, 0xfffffff);
+ uint32_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x51, Return x16, 0xfffffff);
+ uint32_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x54, Return x18, 0x1fffffff);
+ uint32_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x57, Return x20, 0xfffffff);
+ uint32_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x60, Return x22, 0xfffffff);
+ uint32_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x63, Return x24, 0xfffffff);
+ uint32_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x66, Return x26, 0x1fffffff);
+ uint32_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x69, Return x28, 0xfffffff);
+ uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0xfffffff);
+ uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0xfffffff);
+ uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
+ uint32_t x80 = (x79 & 0x1ffffffd);
+ uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ uint32_t x84 = (x79 & 0xfffffff);
+ uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ uint32_t x88 = (x79 & 0xfffffff);
+ uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ uint32_t x92 = (x79 & 0xfffffff);
+ uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ uint32_t x96 = (x79 & 0x1fffffff);
+ uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ uint32_t x100 = (x79 & 0xfffffff);
+ uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ uint32_t x104 = (x79 & 0xfffffff);
+ uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ uint32_t x108 = (x79 & 0xfffffff);
+ uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ uint32_t x112 = (x79 & 0x1fffffff);
+ uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ uint32_t x116 = (x79 & 0xfffffff);
+ uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ uint32_t x120 = (x79 & 0xfffffff);
+ uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ uint32_t x124 = (x79 & 0xfffffff);
+ uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ uint32_t x128 = (x79 & 0x1fffffff);
+ uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 29 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ uint32_t x132 = (x79 & 0xfffffff);
+ uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ uint32_t x136 = (x79 & 0xfffffff);
+ uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ uint32_t x140 = (x79 & 0xfffffff);
+ uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 28 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ (Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
- : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e480m2e240m1/femul.c b/src/Specific/solinas32_2e480m2e240m1/femul.c
new file mode 100644
index 000000000..2fda72b70
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/femul.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ ℤ x64 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x19 + x32) *ℤ ((uint64_t)x49 + x62)) -ℤ ((uint64_t)x19 * x49)), ((((uint64_t)x19 * x62) +ℤ ((uint64_t)x32 * x49)) +ℤ ((uint64_t)x32 * x62)));
+{ ℤ x65 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x17 + x33) *ℤ ((uint64_t)x49 + x62)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x47 + x63))) -ℤ (((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47))), (((((uint64_t)x17 * x62) +ℤ ((uint64_t)x19 * x63)) +ℤ (((uint64_t)x33 * x49) +ℤ ((uint64_t)x32 * x47))) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))));
+{ ℤ x66 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x15 + x31) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x47 + x63)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x45 + x61)))) -ℤ (((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45)))), (((((uint64_t)x15 * x62) +ℤ (((uint64_t)x17 * x63) +ℤ ((uint64_t)x19 * x61))) +ℤ (((uint64_t)x31 * x49) +ℤ (((uint64_t)x33 * x47) +ℤ ((uint64_t)x32 * x45)))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))));
+{ ℤ x67 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x13 + x29) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x45 + x61)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x43 + x59))))) -ℤ (((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43))))), (((((uint64_t)x13 * x62) +ℤ (((uint64_t)x15 * x63) +ℤ (((uint64_t)x17 * x61) +ℤ ((uint64_t)x19 * x59)))) +ℤ (((uint64_t)x29 * x49) +ℤ (((uint64_t)x31 * x47) +ℤ (((uint64_t)x33 * x45) +ℤ ((uint64_t)x32 * x43))))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))));
+{ ℤ x68 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x11 + x27) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x43 + x59)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x41 + x57)))))) -ℤ (((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41)))))), (((((uint64_t)x11 * x62) +ℤ (((uint64_t)x13 * x63) +ℤ (((uint64_t)x15 * x61) +ℤ (((uint64_t)x17 * x59) +ℤ ((uint64_t)x19 * x57))))) +ℤ (((uint64_t)x27 * x49) +ℤ (((uint64_t)x29 * x47) +ℤ (((uint64_t)x31 * x45) +ℤ (((uint64_t)x33 * x43) +ℤ ((uint64_t)x32 * x41)))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))));
+{ ℤ x69 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x9 + x25) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x41 + x57)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) +ℤ (((uint64_t)x11 * x63) +ℤ (((uint64_t)x13 * x61) +ℤ (((uint64_t)x15 * x59) +ℤ (((uint64_t)x17 * x57) +ℤ ((uint64_t)x19 * x55)))))) +ℤ (((uint64_t)x25 * x49) +ℤ (((uint64_t)x27 * x47) +ℤ (((uint64_t)x29 * x45) +ℤ (((uint64_t)x31 * x43) +ℤ (((uint64_t)x33 * x41) +ℤ ((uint64_t)x32 * x39))))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))));
+{ ℤ x70 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x7 + x23) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x39 + x55)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) +ℤ (((uint64_t)x9 * x63) +ℤ (((uint64_t)x11 * x61) +ℤ (((uint64_t)x13 * x59) +ℤ (((uint64_t)x15 * x57) +ℤ (((uint64_t)x17 * x55) +ℤ ((uint64_t)x19 * x53))))))) +ℤ (((uint64_t)x23 * x49) +ℤ (((uint64_t)x25 * x47) +ℤ (((uint64_t)x27 * x45) +ℤ (((uint64_t)x29 * x43) +ℤ (((uint64_t)x31 * x41) +ℤ (((uint64_t)x33 * x39) +ℤ ((uint64_t)x32 * x37)))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))));
+{ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) +ℤ (((uint64_t)x7 * x63) +ℤ (((uint64_t)x9 * x61) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ (((uint64_t)x15 * x55) +ℤ (((uint64_t)x17 * x53) +ℤ ((uint64_t)x19 * x51)))))))) +ℤ (((uint64_t)x21 * x49) +ℤ (((uint64_t)x23 * x47) +ℤ (((uint64_t)x25 * x45) +ℤ (((uint64_t)x27 * x43) +ℤ (((uint64_t)x29 * x41) +ℤ (((uint64_t)x31 * x39) +ℤ (((uint64_t)x33 * x37) +ℤ ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51))))))))));
+{ ℤ x72 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x17 + x33) *ℤ ((uint64_t)x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) +ℤ (((uint64_t)x7 * x61) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) +ℤ (((uint64_t)x13 * x55) +ℤ (((uint64_t)x15 * x53) +ℤ ((uint64_t)x17 * x51))))))) +ℤ (((uint64_t)x21 * x47) +ℤ (((uint64_t)x23 * x45) +ℤ (((uint64_t)x25 * x43) +ℤ (((uint64_t)x27 * x41) +ℤ (((uint64_t)x29 * x39) +ℤ (((uint64_t)x31 * x37) +ℤ ((uint64_t)x33 * x35)))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))));
+{ ℤ x73 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x15 + x31) *ℤ ((uint64_t)x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) +ℤ (((uint64_t)x7 * x59) +ℤ (((uint64_t)x9 * x57) +ℤ (((uint64_t)x11 * x55) +ℤ (((uint64_t)x13 * x53) +ℤ ((uint64_t)x15 * x51)))))) +ℤ (((uint64_t)x21 * x45) +ℤ (((uint64_t)x23 * x43) +ℤ (((uint64_t)x25 * x41) +ℤ (((uint64_t)x27 * x39) +ℤ (((uint64_t)x29 * x37) +ℤ ((uint64_t)x31 * x35))))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))));
+{ ℤ x74 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x13 + x29) *ℤ ((uint64_t)x35 + x51)))))) -ℤ (((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35)))))), (((((uint64_t)x5 * x59) +ℤ (((uint64_t)x7 * x57) +ℤ (((uint64_t)x9 * x55) +ℤ (((uint64_t)x11 * x53) +ℤ ((uint64_t)x13 * x51))))) +ℤ (((uint64_t)x21 * x43) +ℤ (((uint64_t)x23 * x41) +ℤ (((uint64_t)x25 * x39) +ℤ (((uint64_t)x27 * x37) +ℤ ((uint64_t)x29 * x35)))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))));
+{ ℤ x75 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x11 + x27) *ℤ ((uint64_t)x35 + x51))))) -ℤ (((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35))))), (((((uint64_t)x5 * x57) +ℤ (((uint64_t)x7 * x55) +ℤ (((uint64_t)x9 * x53) +ℤ ((uint64_t)x11 * x51)))) +ℤ (((uint64_t)x21 * x41) +ℤ (((uint64_t)x23 * x39) +ℤ (((uint64_t)x25 * x37) +ℤ ((uint64_t)x27 * x35))))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))));
+{ ℤ x76 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x9 + x25) *ℤ ((uint64_t)x35 + x51)))) -ℤ (((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35)))), (((((uint64_t)x5 * x55) +ℤ (((uint64_t)x7 * x53) +ℤ ((uint64_t)x9 * x51))) +ℤ (((uint64_t)x21 * x39) +ℤ (((uint64_t)x23 * x37) +ℤ ((uint64_t)x25 * x35)))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))));
+{ ℤ x77 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x7 + x23) *ℤ ((uint64_t)x35 + x51))) -ℤ (((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35))), (((((uint64_t)x5 * x53) +ℤ ((uint64_t)x7 * x51)) +ℤ (((uint64_t)x21 * x37) +ℤ ((uint64_t)x23 * x35))) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))));
+{ ℤ x78 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x5 + x21) *ℤ ((uint64_t)x35 + x51)) -ℤ ((uint64_t)x5 * x35)), ((((uint64_t)x5 * x51) +ℤ ((uint64_t)x21 * x35)) +ℤ ((uint64_t)x21 * x51)));
+{ ℤ x79 = (((((uint64_t)x19 * x49) +ℤ ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+{ ℤ x80 = ((((((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47)) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))) +ℤ x73) +ℤ x65);
+{ ℤ x81 = ((((((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))) +ℤ x74) +ℤ x66);
+{ ℤ x82 = ((((((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43)))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))) +ℤ x75) +ℤ x67);
+{ ℤ x83 = ((((((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))) +ℤ x76) +ℤ x68);
+{ ℤ x84 = ((((((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39)))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))) +ℤ x77) +ℤ x69);
+{ ℤ x85 = ((((((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))) +ℤ x78) +ℤ x70);
+{ ℤ x86 = ((((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35)))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51)))))))));
+{ ℤ x87 = (((((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))) +ℤ x64);
+{ ℤ x88 = (((((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35)))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))) +ℤ x65);
+{ ℤ x89 = (((((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))) +ℤ x66);
+{ ℤ x90 = (((((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35)))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))) +ℤ x67);
+{ ℤ x91 = (((((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))) +ℤ x68);
+{ ℤ x92 = (((((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35)) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))) +ℤ x69);
+{ ℤ x93 = ((((uint64_t)x5 * x35) +ℤ ((uint64_t)x21 * x51)) +ℤ x70);
+{ uint64_t x94 = (x86 >> 0x1e);
+{ uint32_t x95 = (x86 & 0x3fffffff);
+{ uint64_t x96 = (x71 >> 0x1e);
+{ uint32_t x97 = (x71 & 0x3fffffff);
+{ ℤ x98 = ((0x40000000 *ℤ x96) +ℤ x97);
+{ uint64_t x99 = (x98 >> 0x1e);
+{ uint32_t x100 = (x98 & 0x3fffffff);
+{ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+{ uint64_t x102 = (x101 >> 0x1e);
+{ uint32_t x103 = (x101 & 0x3fffffff);
+{ ℤ x104 = (x93 +ℤ x99);
+{ uint64_t x105 = (x104 >> 0x1e);
+{ uint32_t x106 = (x104 & 0x3fffffff);
+{ ℤ x107 = (x102 +ℤ x84);
+{ uint64_t x108 = (x107 >> 0x1e);
+{ uint32_t x109 = (x107 & 0x3fffffff);
+{ ℤ x110 = (x105 +ℤ x92);
+{ uint64_t x111 = (x110 >> 0x1e);
+{ uint32_t x112 = (x110 & 0x3fffffff);
+{ ℤ x113 = (x108 +ℤ x83);
+{ uint64_t x114 = (x113 >> 0x1e);
+{ uint32_t x115 = (x113 & 0x3fffffff);
+{ ℤ x116 = (x111 +ℤ x91);
+{ uint64_t x117 = (x116 >> 0x1e);
+{ uint32_t x118 = (x116 & 0x3fffffff);
+{ ℤ x119 = (x114 +ℤ x82);
+{ uint64_t x120 = (x119 >> 0x1e);
+{ uint32_t x121 = (x119 & 0x3fffffff);
+{ ℤ x122 = (x117 +ℤ x90);
+{ uint64_t x123 = (x122 >> 0x1e);
+{ uint32_t x124 = (x122 & 0x3fffffff);
+{ ℤ x125 = (x120 +ℤ x81);
+{ uint64_t x126 = (x125 >> 0x1e);
+{ uint32_t x127 = (x125 & 0x3fffffff);
+{ ℤ x128 = (x123 +ℤ x89);
+{ uint64_t x129 = (x128 >> 0x1e);
+{ uint32_t x130 = (x128 & 0x3fffffff);
+{ ℤ x131 = (x126 +ℤ x80);
+{ uint64_t x132 = (x131 >> 0x1e);
+{ uint32_t x133 = (x131 & 0x3fffffff);
+{ ℤ x134 = (x129 +ℤ x88);
+{ uint64_t x135 = (x134 >> 0x1e);
+{ uint32_t x136 = (x134 & 0x3fffffff);
+{ ℤ x137 = (x132 +ℤ x79);
+{ uint64_t x138 = (x137 >> 0x1e);
+{ uint32_t x139 = (x137 & 0x3fffffff);
+{ ℤ x140 = (x135 +ℤ x87);
+{ uint64_t x141 = (x140 >> 0x1e);
+{ uint32_t x142 = (x140 & 0x3fffffff);
+{ uint64_t x143 = (x138 + x100);
+{ uint32_t x144 = (uint32_t) (x143 >> 0x1e);
+{ uint32_t x145 = ((uint32_t)x143 & 0x3fffffff);
+{ uint64_t x146 = (x141 + x95);
+{ uint32_t x147 = (uint32_t) (x146 >> 0x1e);
+{ uint32_t x148 = ((uint32_t)x146 & 0x3fffffff);
+{ uint64_t x149 = (((uint64_t)0x40000000 * x144) + x145);
+{ uint32_t x150 = (uint32_t) (x149 >> 0x1e);
+{ uint32_t x151 = ((uint32_t)x149 & 0x3fffffff);
+{ uint32_t x152 = ((x147 + x103) + x150);
+{ uint32_t x153 = (x152 >> 0x1e);
+{ uint32_t x154 = (x152 & 0x3fffffff);
+{ uint32_t x155 = (x106 + x150);
+{ uint32_t x156 = (x155 >> 0x1e);
+{ uint32_t x157 = (x155 & 0x3fffffff);
+out[0] = x151;
+out[1] = x139;
+out[2] = x133;
+out[3] = x127;
+out[4] = x121;
+out[5] = x115;
+out[6] = x153 + x109;
+out[7] = x154;
+out[8] = x148;
+out[9] = x142;
+out[10] = x136;
+out[11] = x130;
+out[12] = x124;
+out[13] = x118;
+out[14] = x156 + x112;
+out[15] = x157;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e480m2e240m1/femul.h b/src/Specific/solinas32_2e480m2e240m1/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas32_2e480m2e240m1/femulDisplay.log b/src/Specific/solinas32_2e480m2e240m1/femulDisplay.log
new file mode 100644
index 000000000..9bed1ebda
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/femulDisplay.log
@@ -0,0 +1,101 @@
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ ℤ x64 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x19 + x32) *ℤ ((uint64_t)x49 + x62)) -ℤ ((uint64_t)x19 * x49)), ((((uint64_t)x19 * x62) +ℤ ((uint64_t)x32 * x49)) +ℤ ((uint64_t)x32 * x62)));
+ ℤ x65 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x17 + x33) *ℤ ((uint64_t)x49 + x62)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x47 + x63))) -ℤ (((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47))), (((((uint64_t)x17 * x62) +ℤ ((uint64_t)x19 * x63)) +ℤ (((uint64_t)x33 * x49) +ℤ ((uint64_t)x32 * x47))) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))));
+ ℤ x66 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x15 + x31) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x47 + x63)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x45 + x61)))) -ℤ (((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45)))), (((((uint64_t)x15 * x62) +ℤ (((uint64_t)x17 * x63) +ℤ ((uint64_t)x19 * x61))) +ℤ (((uint64_t)x31 * x49) +ℤ (((uint64_t)x33 * x47) +ℤ ((uint64_t)x32 * x45)))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))));
+ ℤ x67 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x13 + x29) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x45 + x61)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x43 + x59))))) -ℤ (((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43))))), (((((uint64_t)x13 * x62) +ℤ (((uint64_t)x15 * x63) +ℤ (((uint64_t)x17 * x61) +ℤ ((uint64_t)x19 * x59)))) +ℤ (((uint64_t)x29 * x49) +ℤ (((uint64_t)x31 * x47) +ℤ (((uint64_t)x33 * x45) +ℤ ((uint64_t)x32 * x43))))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))));
+ ℤ x68 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x11 + x27) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x43 + x59)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x41 + x57)))))) -ℤ (((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41)))))), (((((uint64_t)x11 * x62) +ℤ (((uint64_t)x13 * x63) +ℤ (((uint64_t)x15 * x61) +ℤ (((uint64_t)x17 * x59) +ℤ ((uint64_t)x19 * x57))))) +ℤ (((uint64_t)x27 * x49) +ℤ (((uint64_t)x29 * x47) +ℤ (((uint64_t)x31 * x45) +ℤ (((uint64_t)x33 * x43) +ℤ ((uint64_t)x32 * x41)))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))));
+ ℤ x69 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x9 + x25) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x41 + x57)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x39 + x55))))))) -ℤ (((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39))))))), (((((uint64_t)x9 * x62) +ℤ (((uint64_t)x11 * x63) +ℤ (((uint64_t)x13 * x61) +ℤ (((uint64_t)x15 * x59) +ℤ (((uint64_t)x17 * x57) +ℤ ((uint64_t)x19 * x55)))))) +ℤ (((uint64_t)x25 * x49) +ℤ (((uint64_t)x27 * x47) +ℤ (((uint64_t)x29 * x45) +ℤ (((uint64_t)x31 * x43) +ℤ (((uint64_t)x33 * x41) +ℤ ((uint64_t)x32 * x39))))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))));
+ ℤ x70 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x7 + x23) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x39 + x55)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x37 + x53)))))))) -ℤ (((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37)))))))), (((((uint64_t)x7 * x62) +ℤ (((uint64_t)x9 * x63) +ℤ (((uint64_t)x11 * x61) +ℤ (((uint64_t)x13 * x59) +ℤ (((uint64_t)x15 * x57) +ℤ (((uint64_t)x17 * x55) +ℤ ((uint64_t)x19 * x53))))))) +ℤ (((uint64_t)x23 * x49) +ℤ (((uint64_t)x25 * x47) +ℤ (((uint64_t)x27 * x45) +ℤ (((uint64_t)x29 * x43) +ℤ (((uint64_t)x31 * x41) +ℤ (((uint64_t)x33 * x39) +ℤ ((uint64_t)x32 * x37)))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))));
+ ℤ x71 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x49 + x62)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x17 + x33) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x19 + x32) *ℤ ((uint64_t)x35 + x51))))))))) -ℤ (((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35))))))))), (((((uint64_t)x5 * x62) +ℤ (((uint64_t)x7 * x63) +ℤ (((uint64_t)x9 * x61) +ℤ (((uint64_t)x11 * x59) +ℤ (((uint64_t)x13 * x57) +ℤ (((uint64_t)x15 * x55) +ℤ (((uint64_t)x17 * x53) +ℤ ((uint64_t)x19 * x51)))))))) +ℤ (((uint64_t)x21 * x49) +ℤ (((uint64_t)x23 * x47) +ℤ (((uint64_t)x25 * x45) +ℤ (((uint64_t)x27 * x43) +ℤ (((uint64_t)x29 * x41) +ℤ (((uint64_t)x31 * x39) +ℤ (((uint64_t)x33 * x37) +ℤ ((uint64_t)x32 * x35))))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51))))))))));
+ ℤ x72 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x47 + x63)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x15 + x31) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x17 + x33) *ℤ ((uint64_t)x35 + x51)))))))) -ℤ (((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35)))))))), (((((uint64_t)x5 * x63) +ℤ (((uint64_t)x7 * x61) +ℤ (((uint64_t)x9 * x59) +ℤ (((uint64_t)x11 * x57) +ℤ (((uint64_t)x13 * x55) +ℤ (((uint64_t)x15 * x53) +ℤ ((uint64_t)x17 * x51))))))) +ℤ (((uint64_t)x21 * x47) +ℤ (((uint64_t)x23 * x45) +ℤ (((uint64_t)x25 * x43) +ℤ (((uint64_t)x27 * x41) +ℤ (((uint64_t)x29 * x39) +ℤ (((uint64_t)x31 * x37) +ℤ ((uint64_t)x33 * x35)))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))));
+ ℤ x73 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x45 + x61)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x13 + x29) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x15 + x31) *ℤ ((uint64_t)x35 + x51))))))) -ℤ (((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35))))))), (((((uint64_t)x5 * x61) +ℤ (((uint64_t)x7 * x59) +ℤ (((uint64_t)x9 * x57) +ℤ (((uint64_t)x11 * x55) +ℤ (((uint64_t)x13 * x53) +ℤ ((uint64_t)x15 * x51)))))) +ℤ (((uint64_t)x21 * x45) +ℤ (((uint64_t)x23 * x43) +ℤ (((uint64_t)x25 * x41) +ℤ (((uint64_t)x27 * x39) +ℤ (((uint64_t)x29 * x37) +ℤ ((uint64_t)x31 * x35))))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))));
+ ℤ x74 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x43 + x59)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x11 + x27) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x13 + x29) *ℤ ((uint64_t)x35 + x51)))))) -ℤ (((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35)))))), (((((uint64_t)x5 * x59) +ℤ (((uint64_t)x7 * x57) +ℤ (((uint64_t)x9 * x55) +ℤ (((uint64_t)x11 * x53) +ℤ ((uint64_t)x13 * x51))))) +ℤ (((uint64_t)x21 * x43) +ℤ (((uint64_t)x23 * x41) +ℤ (((uint64_t)x25 * x39) +ℤ (((uint64_t)x27 * x37) +ℤ ((uint64_t)x29 * x35)))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))));
+ ℤ x75 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x41 + x57)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x9 + x25) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x11 + x27) *ℤ ((uint64_t)x35 + x51))))) -ℤ (((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35))))), (((((uint64_t)x5 * x57) +ℤ (((uint64_t)x7 * x55) +ℤ (((uint64_t)x9 * x53) +ℤ ((uint64_t)x11 * x51)))) +ℤ (((uint64_t)x21 * x41) +ℤ (((uint64_t)x23 * x39) +ℤ (((uint64_t)x25 * x37) +ℤ ((uint64_t)x27 * x35))))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))));
+ ℤ x76 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x39 + x55)) +ℤ ((((uint64_t)x7 + x23) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x9 + x25) *ℤ ((uint64_t)x35 + x51)))) -ℤ (((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35)))), (((((uint64_t)x5 * x55) +ℤ (((uint64_t)x7 * x53) +ℤ ((uint64_t)x9 * x51))) +ℤ (((uint64_t)x21 * x39) +ℤ (((uint64_t)x23 * x37) +ℤ ((uint64_t)x25 * x35)))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))));
+ ℤ x77 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x5 + x21) *ℤ ((uint64_t)x37 + x53)) +ℤ (((uint64_t)x7 + x23) *ℤ ((uint64_t)x35 + x51))) -ℤ (((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35))), (((((uint64_t)x5 * x53) +ℤ ((uint64_t)x7 * x51)) +ℤ (((uint64_t)x21 * x37) +ℤ ((uint64_t)x23 * x35))) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))));
+ ℤ x78 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x5 + x21) *ℤ ((uint64_t)x35 + x51)) -ℤ ((uint64_t)x5 * x35)), ((((uint64_t)x5 * x51) +ℤ ((uint64_t)x21 * x35)) +ℤ ((uint64_t)x21 * x51)));
+ ℤ x79 = (((((uint64_t)x19 * x49) +ℤ ((uint64_t)x32 * x62)) +ℤ x72) +ℤ x64);
+ ℤ x80 = ((((((uint64_t)x17 * x49) +ℤ ((uint64_t)x19 * x47)) +ℤ (((uint64_t)x33 * x62) +ℤ ((uint64_t)x32 * x63))) +ℤ x73) +ℤ x65);
+ ℤ x81 = ((((((uint64_t)x15 * x49) +ℤ (((uint64_t)x17 * x47) +ℤ ((uint64_t)x19 * x45))) +ℤ (((uint64_t)x31 * x62) +ℤ (((uint64_t)x33 * x63) +ℤ ((uint64_t)x32 * x61)))) +ℤ x74) +ℤ x66);
+ ℤ x82 = ((((((uint64_t)x13 * x49) +ℤ (((uint64_t)x15 * x47) +ℤ (((uint64_t)x17 * x45) +ℤ ((uint64_t)x19 * x43)))) +ℤ (((uint64_t)x29 * x62) +ℤ (((uint64_t)x31 * x63) +ℤ (((uint64_t)x33 * x61) +ℤ ((uint64_t)x32 * x59))))) +ℤ x75) +ℤ x67);
+ ℤ x83 = ((((((uint64_t)x11 * x49) +ℤ (((uint64_t)x13 * x47) +ℤ (((uint64_t)x15 * x45) +ℤ (((uint64_t)x17 * x43) +ℤ ((uint64_t)x19 * x41))))) +ℤ (((uint64_t)x27 * x62) +ℤ (((uint64_t)x29 * x63) +ℤ (((uint64_t)x31 * x61) +ℤ (((uint64_t)x33 * x59) +ℤ ((uint64_t)x32 * x57)))))) +ℤ x76) +ℤ x68);
+ ℤ x84 = ((((((uint64_t)x9 * x49) +ℤ (((uint64_t)x11 * x47) +ℤ (((uint64_t)x13 * x45) +ℤ (((uint64_t)x15 * x43) +ℤ (((uint64_t)x17 * x41) +ℤ ((uint64_t)x19 * x39)))))) +ℤ (((uint64_t)x25 * x62) +ℤ (((uint64_t)x27 * x63) +ℤ (((uint64_t)x29 * x61) +ℤ (((uint64_t)x31 * x59) +ℤ (((uint64_t)x33 * x57) +ℤ ((uint64_t)x32 * x55))))))) +ℤ x77) +ℤ x69);
+ ℤ x85 = ((((((uint64_t)x7 * x49) +ℤ (((uint64_t)x9 * x47) +ℤ (((uint64_t)x11 * x45) +ℤ (((uint64_t)x13 * x43) +ℤ (((uint64_t)x15 * x41) +ℤ (((uint64_t)x17 * x39) +ℤ ((uint64_t)x19 * x37))))))) +ℤ (((uint64_t)x23 * x62) +ℤ (((uint64_t)x25 * x63) +ℤ (((uint64_t)x27 * x61) +ℤ (((uint64_t)x29 * x59) +ℤ (((uint64_t)x31 * x57) +ℤ (((uint64_t)x33 * x55) +ℤ ((uint64_t)x32 * x53)))))))) +ℤ x78) +ℤ x70);
+ ℤ x86 = ((((uint64_t)x5 * x49) +ℤ (((uint64_t)x7 * x47) +ℤ (((uint64_t)x9 * x45) +ℤ (((uint64_t)x11 * x43) +ℤ (((uint64_t)x13 * x41) +ℤ (((uint64_t)x15 * x39) +ℤ (((uint64_t)x17 * x37) +ℤ ((uint64_t)x19 * x35)))))))) +ℤ (((uint64_t)x21 * x62) +ℤ (((uint64_t)x23 * x63) +ℤ (((uint64_t)x25 * x61) +ℤ (((uint64_t)x27 * x59) +ℤ (((uint64_t)x29 * x57) +ℤ (((uint64_t)x31 * x55) +ℤ (((uint64_t)x33 * x53) +ℤ ((uint64_t)x32 * x51)))))))));
+ ℤ x87 = (((((uint64_t)x5 * x47) +ℤ (((uint64_t)x7 * x45) +ℤ (((uint64_t)x9 * x43) +ℤ (((uint64_t)x11 * x41) +ℤ (((uint64_t)x13 * x39) +ℤ (((uint64_t)x15 * x37) +ℤ ((uint64_t)x17 * x35))))))) +ℤ (((uint64_t)x21 * x63) +ℤ (((uint64_t)x23 * x61) +ℤ (((uint64_t)x25 * x59) +ℤ (((uint64_t)x27 * x57) +ℤ (((uint64_t)x29 * x55) +ℤ (((uint64_t)x31 * x53) +ℤ ((uint64_t)x33 * x51)))))))) +ℤ x64);
+ ℤ x88 = (((((uint64_t)x5 * x45) +ℤ (((uint64_t)x7 * x43) +ℤ (((uint64_t)x9 * x41) +ℤ (((uint64_t)x11 * x39) +ℤ (((uint64_t)x13 * x37) +ℤ ((uint64_t)x15 * x35)))))) +ℤ (((uint64_t)x21 * x61) +ℤ (((uint64_t)x23 * x59) +ℤ (((uint64_t)x25 * x57) +ℤ (((uint64_t)x27 * x55) +ℤ (((uint64_t)x29 * x53) +ℤ ((uint64_t)x31 * x51))))))) +ℤ x65);
+ ℤ x89 = (((((uint64_t)x5 * x43) +ℤ (((uint64_t)x7 * x41) +ℤ (((uint64_t)x9 * x39) +ℤ (((uint64_t)x11 * x37) +ℤ ((uint64_t)x13 * x35))))) +ℤ (((uint64_t)x21 * x59) +ℤ (((uint64_t)x23 * x57) +ℤ (((uint64_t)x25 * x55) +ℤ (((uint64_t)x27 * x53) +ℤ ((uint64_t)x29 * x51)))))) +ℤ x66);
+ ℤ x90 = (((((uint64_t)x5 * x41) +ℤ (((uint64_t)x7 * x39) +ℤ (((uint64_t)x9 * x37) +ℤ ((uint64_t)x11 * x35)))) +ℤ (((uint64_t)x21 * x57) +ℤ (((uint64_t)x23 * x55) +ℤ (((uint64_t)x25 * x53) +ℤ ((uint64_t)x27 * x51))))) +ℤ x67);
+ ℤ x91 = (((((uint64_t)x5 * x39) +ℤ (((uint64_t)x7 * x37) +ℤ ((uint64_t)x9 * x35))) +ℤ (((uint64_t)x21 * x55) +ℤ (((uint64_t)x23 * x53) +ℤ ((uint64_t)x25 * x51)))) +ℤ x68);
+ ℤ x92 = (((((uint64_t)x5 * x37) +ℤ ((uint64_t)x7 * x35)) +ℤ (((uint64_t)x21 * x53) +ℤ ((uint64_t)x23 * x51))) +ℤ x69);
+ ℤ x93 = ((((uint64_t)x5 * x35) +ℤ ((uint64_t)x21 * x51)) +ℤ x70);
+ uint64_t x94 = (x86 >> 0x1e);
+ uint32_t x95 = (x86 & 0x3fffffff);
+ uint64_t x96 = (x71 >> 0x1e);
+ uint32_t x97 = (x71 & 0x3fffffff);
+ ℤ x98 = ((0x40000000 *ℤ x96) +ℤ x97);
+ uint64_t x99 = (x98 >> 0x1e);
+ uint32_t x100 = (x98 & 0x3fffffff);
+ ℤ x101 = ((x94 +ℤ x85) +ℤ x99);
+ uint64_t x102 = (x101 >> 0x1e);
+ uint32_t x103 = (x101 & 0x3fffffff);
+ ℤ x104 = (x93 +ℤ x99);
+ uint64_t x105 = (x104 >> 0x1e);
+ uint32_t x106 = (x104 & 0x3fffffff);
+ ℤ x107 = (x102 +ℤ x84);
+ uint64_t x108 = (x107 >> 0x1e);
+ uint32_t x109 = (x107 & 0x3fffffff);
+ ℤ x110 = (x105 +ℤ x92);
+ uint64_t x111 = (x110 >> 0x1e);
+ uint32_t x112 = (x110 & 0x3fffffff);
+ ℤ x113 = (x108 +ℤ x83);
+ uint64_t x114 = (x113 >> 0x1e);
+ uint32_t x115 = (x113 & 0x3fffffff);
+ ℤ x116 = (x111 +ℤ x91);
+ uint64_t x117 = (x116 >> 0x1e);
+ uint32_t x118 = (x116 & 0x3fffffff);
+ ℤ x119 = (x114 +ℤ x82);
+ uint64_t x120 = (x119 >> 0x1e);
+ uint32_t x121 = (x119 & 0x3fffffff);
+ ℤ x122 = (x117 +ℤ x90);
+ uint64_t x123 = (x122 >> 0x1e);
+ uint32_t x124 = (x122 & 0x3fffffff);
+ ℤ x125 = (x120 +ℤ x81);
+ uint64_t x126 = (x125 >> 0x1e);
+ uint32_t x127 = (x125 & 0x3fffffff);
+ ℤ x128 = (x123 +ℤ x89);
+ uint64_t x129 = (x128 >> 0x1e);
+ uint32_t x130 = (x128 & 0x3fffffff);
+ ℤ x131 = (x126 +ℤ x80);
+ uint64_t x132 = (x131 >> 0x1e);
+ uint32_t x133 = (x131 & 0x3fffffff);
+ ℤ x134 = (x129 +ℤ x88);
+ uint64_t x135 = (x134 >> 0x1e);
+ uint32_t x136 = (x134 & 0x3fffffff);
+ ℤ x137 = (x132 +ℤ x79);
+ uint64_t x138 = (x137 >> 0x1e);
+ uint32_t x139 = (x137 & 0x3fffffff);
+ ℤ x140 = (x135 +ℤ x87);
+ uint64_t x141 = (x140 >> 0x1e);
+ uint32_t x142 = (x140 & 0x3fffffff);
+ uint64_t x143 = (x138 + x100);
+ uint32_t x144 = (uint32_t) (x143 >> 0x1e);
+ uint32_t x145 = ((uint32_t)x143 & 0x3fffffff);
+ uint64_t x146 = (x141 + x95);
+ uint32_t x147 = (uint32_t) (x146 >> 0x1e);
+ uint32_t x148 = ((uint32_t)x146 & 0x3fffffff);
+ uint64_t x149 = (((uint64_t)0x40000000 * x144) + x145);
+ uint32_t x150 = (uint32_t) (x149 >> 0x1e);
+ uint32_t x151 = ((uint32_t)x149 & 0x3fffffff);
+ uint32_t x152 = ((x147 + x103) + x150);
+ uint32_t x153 = (x152 >> 0x1e);
+ uint32_t x154 = (x152 & 0x3fffffff);
+ uint32_t x155 = (x106 + x150);
+ uint32_t x156 = (x155 >> 0x1e);
+ uint32_t x157 = (x155 & 0x3fffffff);
+ return (Return x151, Return x139, Return x133, Return x127, Return x121, Return x115, (x153 + x109), Return x154, Return x148, Return x142, Return x136, Return x130, Return x124, Return x118, (x156 + x112), Return x157))
+(x, x0)%core
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e480m2e240m1/fesquare.c b/src/Specific/solinas32_2e480m2e240m1/fesquare.c
new file mode 100644
index 000000000..015bd9449
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/fesquare.c
@@ -0,0 +1,131 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ ℤ x31 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x16 + x29) *ℤ ((uint64_t)x16 + x29)) -ℤ ((uint64_t)x16 * x16)), ((((uint64_t)x16 * x29) +ℤ ((uint64_t)x29 * x16)) +ℤ ((uint64_t)x29 * x29)));
+{ ℤ x32 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x14 + x30) *ℤ ((uint64_t)x16 + x29)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x14 + x30))) -ℤ (((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14))), (((((uint64_t)x14 * x29) +ℤ ((uint64_t)x16 * x30)) +ℤ (((uint64_t)x30 * x16) +ℤ ((uint64_t)x29 * x14))) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))));
+{ ℤ x33 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x12 + x28) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x14 + x30)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x12 + x28)))) -ℤ (((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12)))), (((((uint64_t)x12 * x29) +ℤ (((uint64_t)x14 * x30) +ℤ ((uint64_t)x16 * x28))) +ℤ (((uint64_t)x28 * x16) +ℤ (((uint64_t)x30 * x14) +ℤ ((uint64_t)x29 * x12)))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))));
+{ ℤ x34 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x10 + x26) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x12 + x28)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x10 + x26))))) -ℤ (((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10))))), (((((uint64_t)x10 * x29) +ℤ (((uint64_t)x12 * x30) +ℤ (((uint64_t)x14 * x28) +ℤ ((uint64_t)x16 * x26)))) +ℤ (((uint64_t)x26 * x16) +ℤ (((uint64_t)x28 * x14) +ℤ (((uint64_t)x30 * x12) +ℤ ((uint64_t)x29 * x10))))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))));
+{ ℤ x35 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x8 + x24) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x10 + x26)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x8 + x24)))))) -ℤ (((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8)))))), (((((uint64_t)x8 * x29) +ℤ (((uint64_t)x10 * x30) +ℤ (((uint64_t)x12 * x28) +ℤ (((uint64_t)x14 * x26) +ℤ ((uint64_t)x16 * x24))))) +ℤ (((uint64_t)x24 * x16) +ℤ (((uint64_t)x26 * x14) +ℤ (((uint64_t)x28 * x12) +ℤ (((uint64_t)x30 * x10) +ℤ ((uint64_t)x29 * x8)))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))));
+{ ℤ x36 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x6 + x22) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x8 + x24)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) +ℤ (((uint64_t)x8 * x30) +ℤ (((uint64_t)x10 * x28) +ℤ (((uint64_t)x12 * x26) +ℤ (((uint64_t)x14 * x24) +ℤ ((uint64_t)x16 * x22)))))) +ℤ (((uint64_t)x22 * x16) +ℤ (((uint64_t)x24 * x14) +ℤ (((uint64_t)x26 * x12) +ℤ (((uint64_t)x28 * x10) +ℤ (((uint64_t)x30 * x8) +ℤ ((uint64_t)x29 * x6))))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))));
+{ ℤ x37 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x4 + x20) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x6 + x22)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) +ℤ (((uint64_t)x6 * x30) +ℤ (((uint64_t)x8 * x28) +ℤ (((uint64_t)x10 * x26) +ℤ (((uint64_t)x12 * x24) +ℤ (((uint64_t)x14 * x22) +ℤ ((uint64_t)x16 * x20))))))) +ℤ (((uint64_t)x20 * x16) +ℤ (((uint64_t)x22 * x14) +ℤ (((uint64_t)x24 * x12) +ℤ (((uint64_t)x26 * x10) +ℤ (((uint64_t)x28 * x8) +ℤ (((uint64_t)x30 * x6) +ℤ ((uint64_t)x29 * x4)))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))));
+{ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) +ℤ (((uint64_t)x4 * x30) +ℤ (((uint64_t)x6 * x28) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ (((uint64_t)x12 * x22) +ℤ (((uint64_t)x14 * x20) +ℤ ((uint64_t)x16 * x18)))))))) +ℤ (((uint64_t)x18 * x16) +ℤ (((uint64_t)x20 * x14) +ℤ (((uint64_t)x22 * x12) +ℤ (((uint64_t)x24 * x10) +ℤ (((uint64_t)x26 * x8) +ℤ (((uint64_t)x28 * x6) +ℤ (((uint64_t)x30 * x4) +ℤ ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18))))))))));
+{ ℤ x39 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x14 + x30) *ℤ ((uint64_t)x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) +ℤ (((uint64_t)x4 * x28) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) +ℤ (((uint64_t)x10 * x22) +ℤ (((uint64_t)x12 * x20) +ℤ ((uint64_t)x14 * x18))))))) +ℤ (((uint64_t)x18 * x14) +ℤ (((uint64_t)x20 * x12) +ℤ (((uint64_t)x22 * x10) +ℤ (((uint64_t)x24 * x8) +ℤ (((uint64_t)x26 * x6) +ℤ (((uint64_t)x28 * x4) +ℤ ((uint64_t)x30 * x2)))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))));
+{ ℤ x40 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x12 + x28) *ℤ ((uint64_t)x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) +ℤ (((uint64_t)x4 * x26) +ℤ (((uint64_t)x6 * x24) +ℤ (((uint64_t)x8 * x22) +ℤ (((uint64_t)x10 * x20) +ℤ ((uint64_t)x12 * x18)))))) +ℤ (((uint64_t)x18 * x12) +ℤ (((uint64_t)x20 * x10) +ℤ (((uint64_t)x22 * x8) +ℤ (((uint64_t)x24 * x6) +ℤ (((uint64_t)x26 * x4) +ℤ ((uint64_t)x28 * x2))))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))));
+{ ℤ x41 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x10 + x26) *ℤ ((uint64_t)x2 + x18)))))) -ℤ (((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2)))))), (((((uint64_t)x2 * x26) +ℤ (((uint64_t)x4 * x24) +ℤ (((uint64_t)x6 * x22) +ℤ (((uint64_t)x8 * x20) +ℤ ((uint64_t)x10 * x18))))) +ℤ (((uint64_t)x18 * x10) +ℤ (((uint64_t)x20 * x8) +ℤ (((uint64_t)x22 * x6) +ℤ (((uint64_t)x24 * x4) +ℤ ((uint64_t)x26 * x2)))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))));
+{ ℤ x42 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x8 + x24) *ℤ ((uint64_t)x2 + x18))))) -ℤ (((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2))))), (((((uint64_t)x2 * x24) +ℤ (((uint64_t)x4 * x22) +ℤ (((uint64_t)x6 * x20) +ℤ ((uint64_t)x8 * x18)))) +ℤ (((uint64_t)x18 * x8) +ℤ (((uint64_t)x20 * x6) +ℤ (((uint64_t)x22 * x4) +ℤ ((uint64_t)x24 * x2))))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))));
+{ ℤ x43 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x6 + x22) *ℤ ((uint64_t)x2 + x18)))) -ℤ (((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2)))), (((((uint64_t)x2 * x22) +ℤ (((uint64_t)x4 * x20) +ℤ ((uint64_t)x6 * x18))) +ℤ (((uint64_t)x18 * x6) +ℤ (((uint64_t)x20 * x4) +ℤ ((uint64_t)x22 * x2)))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))));
+{ ℤ x44 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x4 + x20) *ℤ ((uint64_t)x2 + x18))) -ℤ (((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2))), (((((uint64_t)x2 * x20) +ℤ ((uint64_t)x4 * x18)) +ℤ (((uint64_t)x18 * x4) +ℤ ((uint64_t)x20 * x2))) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))));
+{ ℤ x45 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x2 + x18) *ℤ ((uint64_t)x2 + x18)) -ℤ ((uint64_t)x2 * x2)), ((((uint64_t)x2 * x18) +ℤ ((uint64_t)x18 * x2)) +ℤ ((uint64_t)x18 * x18)));
+{ ℤ x46 = (((((uint64_t)x16 * x16) +ℤ ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+{ ℤ x47 = ((((((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14)) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))) +ℤ x40) +ℤ x32);
+{ ℤ x48 = ((((((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))) +ℤ x41) +ℤ x33);
+{ ℤ x49 = ((((((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10)))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))) +ℤ x42) +ℤ x34);
+{ ℤ x50 = ((((((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))) +ℤ x43) +ℤ x35);
+{ ℤ x51 = ((((((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6)))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))) +ℤ x44) +ℤ x36);
+{ ℤ x52 = ((((((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))) +ℤ x45) +ℤ x37);
+{ ℤ x53 = ((((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18)))))))));
+{ ℤ x54 = (((((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))) +ℤ x31);
+{ ℤ x55 = (((((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2)))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))) +ℤ x32);
+{ ℤ x56 = (((((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))) +ℤ x33);
+{ ℤ x57 = (((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))) +ℤ x34);
+{ ℤ x58 = (((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))) +ℤ x35);
+{ ℤ x59 = (((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))) +ℤ x36);
+{ ℤ x60 = ((((uint64_t)x2 * x2) +ℤ ((uint64_t)x18 * x18)) +ℤ x37);
+{ uint64_t x61 = (x53 >> 0x1e);
+{ uint32_t x62 = (x53 & 0x3fffffff);
+{ uint64_t x63 = (x38 >> 0x1e);
+{ uint32_t x64 = (x38 & 0x3fffffff);
+{ ℤ x65 = ((0x40000000 *ℤ x63) +ℤ x64);
+{ uint64_t x66 = (x65 >> 0x1e);
+{ uint32_t x67 = (x65 & 0x3fffffff);
+{ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+{ uint64_t x69 = (x68 >> 0x1e);
+{ uint32_t x70 = (x68 & 0x3fffffff);
+{ ℤ x71 = (x60 +ℤ x66);
+{ uint64_t x72 = (x71 >> 0x1e);
+{ uint32_t x73 = (x71 & 0x3fffffff);
+{ ℤ x74 = (x69 +ℤ x51);
+{ uint64_t x75 = (x74 >> 0x1e);
+{ uint32_t x76 = (x74 & 0x3fffffff);
+{ ℤ x77 = (x72 +ℤ x59);
+{ uint64_t x78 = (x77 >> 0x1e);
+{ uint32_t x79 = (x77 & 0x3fffffff);
+{ ℤ x80 = (x75 +ℤ x50);
+{ uint64_t x81 = (x80 >> 0x1e);
+{ uint32_t x82 = (x80 & 0x3fffffff);
+{ ℤ x83 = (x78 +ℤ x58);
+{ uint64_t x84 = (x83 >> 0x1e);
+{ uint32_t x85 = (x83 & 0x3fffffff);
+{ ℤ x86 = (x81 +ℤ x49);
+{ uint64_t x87 = (x86 >> 0x1e);
+{ uint32_t x88 = (x86 & 0x3fffffff);
+{ ℤ x89 = (x84 +ℤ x57);
+{ uint64_t x90 = (x89 >> 0x1e);
+{ uint32_t x91 = (x89 & 0x3fffffff);
+{ ℤ x92 = (x87 +ℤ x48);
+{ uint64_t x93 = (x92 >> 0x1e);
+{ uint32_t x94 = (x92 & 0x3fffffff);
+{ ℤ x95 = (x90 +ℤ x56);
+{ uint64_t x96 = (x95 >> 0x1e);
+{ uint32_t x97 = (x95 & 0x3fffffff);
+{ ℤ x98 = (x93 +ℤ x47);
+{ uint64_t x99 = (x98 >> 0x1e);
+{ uint32_t x100 = (x98 & 0x3fffffff);
+{ ℤ x101 = (x96 +ℤ x55);
+{ uint64_t x102 = (x101 >> 0x1e);
+{ uint32_t x103 = (x101 & 0x3fffffff);
+{ ℤ x104 = (x99 +ℤ x46);
+{ uint64_t x105 = (x104 >> 0x1e);
+{ uint32_t x106 = (x104 & 0x3fffffff);
+{ ℤ x107 = (x102 +ℤ x54);
+{ uint64_t x108 = (x107 >> 0x1e);
+{ uint32_t x109 = (x107 & 0x3fffffff);
+{ uint64_t x110 = (x105 + x67);
+{ uint32_t x111 = (uint32_t) (x110 >> 0x1e);
+{ uint32_t x112 = ((uint32_t)x110 & 0x3fffffff);
+{ uint64_t x113 = (x108 + x62);
+{ uint32_t x114 = (uint32_t) (x113 >> 0x1e);
+{ uint32_t x115 = ((uint32_t)x113 & 0x3fffffff);
+{ uint64_t x116 = (((uint64_t)0x40000000 * x111) + x112);
+{ uint32_t x117 = (uint32_t) (x116 >> 0x1e);
+{ uint32_t x118 = ((uint32_t)x116 & 0x3fffffff);
+{ uint32_t x119 = ((x114 + x70) + x117);
+{ uint32_t x120 = (x119 >> 0x1e);
+{ uint32_t x121 = (x119 & 0x3fffffff);
+{ uint32_t x122 = (x73 + x117);
+{ uint32_t x123 = (x122 >> 0x1e);
+{ uint32_t x124 = (x122 & 0x3fffffff);
+out[0] = x118;
+out[1] = x106;
+out[2] = x100;
+out[3] = x94;
+out[4] = x88;
+out[5] = x82;
+out[6] = x120 + x76;
+out[7] = x121;
+out[8] = x115;
+out[9] = x109;
+out[10] = x103;
+out[11] = x97;
+out[12] = x91;
+out[13] = x85;
+out[14] = x123 + x79;
+out[15] = x124;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas32_2e480m2e240m1/fesquare.h b/src/Specific/solinas32_2e480m2e240m1/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e480m2e240m1/fesquareDisplay.log b/src/Specific/solinas32_2e480m2e240m1/fesquareDisplay.log
new file mode 100644
index 000000000..8ac962fc4
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/fesquareDisplay.log
@@ -0,0 +1,101 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ ℤ x31 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x16 + x29) *ℤ ((uint64_t)x16 + x29)) -ℤ ((uint64_t)x16 * x16)), ((((uint64_t)x16 * x29) +ℤ ((uint64_t)x29 * x16)) +ℤ ((uint64_t)x29 * x29)));
+ ℤ x32 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x14 + x30) *ℤ ((uint64_t)x16 + x29)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x14 + x30))) -ℤ (((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14))), (((((uint64_t)x14 * x29) +ℤ ((uint64_t)x16 * x30)) +ℤ (((uint64_t)x30 * x16) +ℤ ((uint64_t)x29 * x14))) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))));
+ ℤ x33 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x12 + x28) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x14 + x30)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x12 + x28)))) -ℤ (((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12)))), (((((uint64_t)x12 * x29) +ℤ (((uint64_t)x14 * x30) +ℤ ((uint64_t)x16 * x28))) +ℤ (((uint64_t)x28 * x16) +ℤ (((uint64_t)x30 * x14) +ℤ ((uint64_t)x29 * x12)))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))));
+ ℤ x34 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x10 + x26) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x12 + x28)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x10 + x26))))) -ℤ (((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10))))), (((((uint64_t)x10 * x29) +ℤ (((uint64_t)x12 * x30) +ℤ (((uint64_t)x14 * x28) +ℤ ((uint64_t)x16 * x26)))) +ℤ (((uint64_t)x26 * x16) +ℤ (((uint64_t)x28 * x14) +ℤ (((uint64_t)x30 * x12) +ℤ ((uint64_t)x29 * x10))))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))));
+ ℤ x35 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x8 + x24) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x10 + x26)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x8 + x24)))))) -ℤ (((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8)))))), (((((uint64_t)x8 * x29) +ℤ (((uint64_t)x10 * x30) +ℤ (((uint64_t)x12 * x28) +ℤ (((uint64_t)x14 * x26) +ℤ ((uint64_t)x16 * x24))))) +ℤ (((uint64_t)x24 * x16) +ℤ (((uint64_t)x26 * x14) +ℤ (((uint64_t)x28 * x12) +ℤ (((uint64_t)x30 * x10) +ℤ ((uint64_t)x29 * x8)))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))));
+ ℤ x36 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x6 + x22) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x8 + x24)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x6 + x22))))))) -ℤ (((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6))))))), (((((uint64_t)x6 * x29) +ℤ (((uint64_t)x8 * x30) +ℤ (((uint64_t)x10 * x28) +ℤ (((uint64_t)x12 * x26) +ℤ (((uint64_t)x14 * x24) +ℤ ((uint64_t)x16 * x22)))))) +ℤ (((uint64_t)x22 * x16) +ℤ (((uint64_t)x24 * x14) +ℤ (((uint64_t)x26 * x12) +ℤ (((uint64_t)x28 * x10) +ℤ (((uint64_t)x30 * x8) +ℤ ((uint64_t)x29 * x6))))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))));
+ ℤ x37 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x4 + x20) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x6 + x22)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x4 + x20)))))))) -ℤ (((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4)))))))), (((((uint64_t)x4 * x29) +ℤ (((uint64_t)x6 * x30) +ℤ (((uint64_t)x8 * x28) +ℤ (((uint64_t)x10 * x26) +ℤ (((uint64_t)x12 * x24) +ℤ (((uint64_t)x14 * x22) +ℤ ((uint64_t)x16 * x20))))))) +ℤ (((uint64_t)x20 * x16) +ℤ (((uint64_t)x22 * x14) +ℤ (((uint64_t)x24 * x12) +ℤ (((uint64_t)x26 * x10) +ℤ (((uint64_t)x28 * x8) +ℤ (((uint64_t)x30 * x6) +ℤ ((uint64_t)x29 * x4)))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))));
+ ℤ x38 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x16 + x29)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x14 + x30) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x16 + x29) *ℤ ((uint64_t)x2 + x18))))))))) -ℤ (((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2))))))))), (((((uint64_t)x2 * x29) +ℤ (((uint64_t)x4 * x30) +ℤ (((uint64_t)x6 * x28) +ℤ (((uint64_t)x8 * x26) +ℤ (((uint64_t)x10 * x24) +ℤ (((uint64_t)x12 * x22) +ℤ (((uint64_t)x14 * x20) +ℤ ((uint64_t)x16 * x18)))))))) +ℤ (((uint64_t)x18 * x16) +ℤ (((uint64_t)x20 * x14) +ℤ (((uint64_t)x22 * x12) +ℤ (((uint64_t)x24 * x10) +ℤ (((uint64_t)x26 * x8) +ℤ (((uint64_t)x28 * x6) +ℤ (((uint64_t)x30 * x4) +ℤ ((uint64_t)x29 * x2))))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18))))))))));
+ ℤ x39 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x14 + x30)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x12 + x28) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x14 + x30) *ℤ ((uint64_t)x2 + x18)))))))) -ℤ (((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2)))))))), (((((uint64_t)x2 * x30) +ℤ (((uint64_t)x4 * x28) +ℤ (((uint64_t)x6 * x26) +ℤ (((uint64_t)x8 * x24) +ℤ (((uint64_t)x10 * x22) +ℤ (((uint64_t)x12 * x20) +ℤ ((uint64_t)x14 * x18))))))) +ℤ (((uint64_t)x18 * x14) +ℤ (((uint64_t)x20 * x12) +ℤ (((uint64_t)x22 * x10) +ℤ (((uint64_t)x24 * x8) +ℤ (((uint64_t)x26 * x6) +ℤ (((uint64_t)x28 * x4) +ℤ ((uint64_t)x30 * x2)))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))));
+ ℤ x40 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x12 + x28)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x10 + x26) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x12 + x28) *ℤ ((uint64_t)x2 + x18))))))) -ℤ (((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2))))))), (((((uint64_t)x2 * x28) +ℤ (((uint64_t)x4 * x26) +ℤ (((uint64_t)x6 * x24) +ℤ (((uint64_t)x8 * x22) +ℤ (((uint64_t)x10 * x20) +ℤ ((uint64_t)x12 * x18)))))) +ℤ (((uint64_t)x18 * x12) +ℤ (((uint64_t)x20 * x10) +ℤ (((uint64_t)x22 * x8) +ℤ (((uint64_t)x24 * x6) +ℤ (((uint64_t)x26 * x4) +ℤ ((uint64_t)x28 * x2))))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))));
+ ℤ x41 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x10 + x26)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x8 + x24) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x10 + x26) *ℤ ((uint64_t)x2 + x18)))))) -ℤ (((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2)))))), (((((uint64_t)x2 * x26) +ℤ (((uint64_t)x4 * x24) +ℤ (((uint64_t)x6 * x22) +ℤ (((uint64_t)x8 * x20) +ℤ ((uint64_t)x10 * x18))))) +ℤ (((uint64_t)x18 * x10) +ℤ (((uint64_t)x20 * x8) +ℤ (((uint64_t)x22 * x6) +ℤ (((uint64_t)x24 * x4) +ℤ ((uint64_t)x26 * x2)))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))));
+ ℤ x42 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x8 + x24)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x6 + x22) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x8 + x24) *ℤ ((uint64_t)x2 + x18))))) -ℤ (((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2))))), (((((uint64_t)x2 * x24) +ℤ (((uint64_t)x4 * x22) +ℤ (((uint64_t)x6 * x20) +ℤ ((uint64_t)x8 * x18)))) +ℤ (((uint64_t)x18 * x8) +ℤ (((uint64_t)x20 * x6) +ℤ (((uint64_t)x22 * x4) +ℤ ((uint64_t)x24 * x2))))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))));
+ ℤ x43 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x6 + x22)) +ℤ ((((uint64_t)x4 + x20) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x6 + x22) *ℤ ((uint64_t)x2 + x18)))) -ℤ (((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2)))), (((((uint64_t)x2 * x22) +ℤ (((uint64_t)x4 * x20) +ℤ ((uint64_t)x6 * x18))) +ℤ (((uint64_t)x18 * x6) +ℤ (((uint64_t)x20 * x4) +ℤ ((uint64_t)x22 * x2)))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))));
+ ℤ x44 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) ((((((uint64_t)x2 + x18) *ℤ ((uint64_t)x4 + x20)) +ℤ (((uint64_t)x4 + x20) *ℤ ((uint64_t)x2 + x18))) -ℤ (((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2))), (((((uint64_t)x2 * x20) +ℤ ((uint64_t)x4 * x18)) +ℤ (((uint64_t)x18 * x4) +ℤ ((uint64_t)x20 * x2))) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))));
+ ℤ x45 = Op (Syntax.IdWithAlt Syntax.TZ Syntax.TZ Syntax.TZ) (((((uint64_t)x2 + x18) *ℤ ((uint64_t)x2 + x18)) -ℤ ((uint64_t)x2 * x2)), ((((uint64_t)x2 * x18) +ℤ ((uint64_t)x18 * x2)) +ℤ ((uint64_t)x18 * x18)));
+ ℤ x46 = (((((uint64_t)x16 * x16) +ℤ ((uint64_t)x29 * x29)) +ℤ x39) +ℤ x31);
+ ℤ x47 = ((((((uint64_t)x14 * x16) +ℤ ((uint64_t)x16 * x14)) +ℤ (((uint64_t)x30 * x29) +ℤ ((uint64_t)x29 * x30))) +ℤ x40) +ℤ x32);
+ ℤ x48 = ((((((uint64_t)x12 * x16) +ℤ (((uint64_t)x14 * x14) +ℤ ((uint64_t)x16 * x12))) +ℤ (((uint64_t)x28 * x29) +ℤ (((uint64_t)x30 * x30) +ℤ ((uint64_t)x29 * x28)))) +ℤ x41) +ℤ x33);
+ ℤ x49 = ((((((uint64_t)x10 * x16) +ℤ (((uint64_t)x12 * x14) +ℤ (((uint64_t)x14 * x12) +ℤ ((uint64_t)x16 * x10)))) +ℤ (((uint64_t)x26 * x29) +ℤ (((uint64_t)x28 * x30) +ℤ (((uint64_t)x30 * x28) +ℤ ((uint64_t)x29 * x26))))) +ℤ x42) +ℤ x34);
+ ℤ x50 = ((((((uint64_t)x8 * x16) +ℤ (((uint64_t)x10 * x14) +ℤ (((uint64_t)x12 * x12) +ℤ (((uint64_t)x14 * x10) +ℤ ((uint64_t)x16 * x8))))) +ℤ (((uint64_t)x24 * x29) +ℤ (((uint64_t)x26 * x30) +ℤ (((uint64_t)x28 * x28) +ℤ (((uint64_t)x30 * x26) +ℤ ((uint64_t)x29 * x24)))))) +ℤ x43) +ℤ x35);
+ ℤ x51 = ((((((uint64_t)x6 * x16) +ℤ (((uint64_t)x8 * x14) +ℤ (((uint64_t)x10 * x12) +ℤ (((uint64_t)x12 * x10) +ℤ (((uint64_t)x14 * x8) +ℤ ((uint64_t)x16 * x6)))))) +ℤ (((uint64_t)x22 * x29) +ℤ (((uint64_t)x24 * x30) +ℤ (((uint64_t)x26 * x28) +ℤ (((uint64_t)x28 * x26) +ℤ (((uint64_t)x30 * x24) +ℤ ((uint64_t)x29 * x22))))))) +ℤ x44) +ℤ x36);
+ ℤ x52 = ((((((uint64_t)x4 * x16) +ℤ (((uint64_t)x6 * x14) +ℤ (((uint64_t)x8 * x12) +ℤ (((uint64_t)x10 * x10) +ℤ (((uint64_t)x12 * x8) +ℤ (((uint64_t)x14 * x6) +ℤ ((uint64_t)x16 * x4))))))) +ℤ (((uint64_t)x20 * x29) +ℤ (((uint64_t)x22 * x30) +ℤ (((uint64_t)x24 * x28) +ℤ (((uint64_t)x26 * x26) +ℤ (((uint64_t)x28 * x24) +ℤ (((uint64_t)x30 * x22) +ℤ ((uint64_t)x29 * x20)))))))) +ℤ x45) +ℤ x37);
+ ℤ x53 = ((((uint64_t)x2 * x16) +ℤ (((uint64_t)x4 * x14) +ℤ (((uint64_t)x6 * x12) +ℤ (((uint64_t)x8 * x10) +ℤ (((uint64_t)x10 * x8) +ℤ (((uint64_t)x12 * x6) +ℤ (((uint64_t)x14 * x4) +ℤ ((uint64_t)x16 * x2)))))))) +ℤ (((uint64_t)x18 * x29) +ℤ (((uint64_t)x20 * x30) +ℤ (((uint64_t)x22 * x28) +ℤ (((uint64_t)x24 * x26) +ℤ (((uint64_t)x26 * x24) +ℤ (((uint64_t)x28 * x22) +ℤ (((uint64_t)x30 * x20) +ℤ ((uint64_t)x29 * x18)))))))));
+ ℤ x54 = (((((uint64_t)x2 * x14) +ℤ (((uint64_t)x4 * x12) +ℤ (((uint64_t)x6 * x10) +ℤ (((uint64_t)x8 * x8) +ℤ (((uint64_t)x10 * x6) +ℤ (((uint64_t)x12 * x4) +ℤ ((uint64_t)x14 * x2))))))) +ℤ (((uint64_t)x18 * x30) +ℤ (((uint64_t)x20 * x28) +ℤ (((uint64_t)x22 * x26) +ℤ (((uint64_t)x24 * x24) +ℤ (((uint64_t)x26 * x22) +ℤ (((uint64_t)x28 * x20) +ℤ ((uint64_t)x30 * x18)))))))) +ℤ x31);
+ ℤ x55 = (((((uint64_t)x2 * x12) +ℤ (((uint64_t)x4 * x10) +ℤ (((uint64_t)x6 * x8) +ℤ (((uint64_t)x8 * x6) +ℤ (((uint64_t)x10 * x4) +ℤ ((uint64_t)x12 * x2)))))) +ℤ (((uint64_t)x18 * x28) +ℤ (((uint64_t)x20 * x26) +ℤ (((uint64_t)x22 * x24) +ℤ (((uint64_t)x24 * x22) +ℤ (((uint64_t)x26 * x20) +ℤ ((uint64_t)x28 * x18))))))) +ℤ x32);
+ ℤ x56 = (((((uint64_t)x2 * x10) +ℤ (((uint64_t)x4 * x8) +ℤ (((uint64_t)x6 * x6) +ℤ (((uint64_t)x8 * x4) +ℤ ((uint64_t)x10 * x2))))) +ℤ (((uint64_t)x18 * x26) +ℤ (((uint64_t)x20 * x24) +ℤ (((uint64_t)x22 * x22) +ℤ (((uint64_t)x24 * x20) +ℤ ((uint64_t)x26 * x18)))))) +ℤ x33);
+ ℤ x57 = (((((uint64_t)x2 * x8) +ℤ (((uint64_t)x4 * x6) +ℤ (((uint64_t)x6 * x4) +ℤ ((uint64_t)x8 * x2)))) +ℤ (((uint64_t)x18 * x24) +ℤ (((uint64_t)x20 * x22) +ℤ (((uint64_t)x22 * x20) +ℤ ((uint64_t)x24 * x18))))) +ℤ x34);
+ ℤ x58 = (((((uint64_t)x2 * x6) +ℤ (((uint64_t)x4 * x4) +ℤ ((uint64_t)x6 * x2))) +ℤ (((uint64_t)x18 * x22) +ℤ (((uint64_t)x20 * x20) +ℤ ((uint64_t)x22 * x18)))) +ℤ x35);
+ ℤ x59 = (((((uint64_t)x2 * x4) +ℤ ((uint64_t)x4 * x2)) +ℤ (((uint64_t)x18 * x20) +ℤ ((uint64_t)x20 * x18))) +ℤ x36);
+ ℤ x60 = ((((uint64_t)x2 * x2) +ℤ ((uint64_t)x18 * x18)) +ℤ x37);
+ uint64_t x61 = (x53 >> 0x1e);
+ uint32_t x62 = (x53 & 0x3fffffff);
+ uint64_t x63 = (x38 >> 0x1e);
+ uint32_t x64 = (x38 & 0x3fffffff);
+ ℤ x65 = ((0x40000000 *ℤ x63) +ℤ x64);
+ uint64_t x66 = (x65 >> 0x1e);
+ uint32_t x67 = (x65 & 0x3fffffff);
+ ℤ x68 = ((x61 +ℤ x52) +ℤ x66);
+ uint64_t x69 = (x68 >> 0x1e);
+ uint32_t x70 = (x68 & 0x3fffffff);
+ ℤ x71 = (x60 +ℤ x66);
+ uint64_t x72 = (x71 >> 0x1e);
+ uint32_t x73 = (x71 & 0x3fffffff);
+ ℤ x74 = (x69 +ℤ x51);
+ uint64_t x75 = (x74 >> 0x1e);
+ uint32_t x76 = (x74 & 0x3fffffff);
+ ℤ x77 = (x72 +ℤ x59);
+ uint64_t x78 = (x77 >> 0x1e);
+ uint32_t x79 = (x77 & 0x3fffffff);
+ ℤ x80 = (x75 +ℤ x50);
+ uint64_t x81 = (x80 >> 0x1e);
+ uint32_t x82 = (x80 & 0x3fffffff);
+ ℤ x83 = (x78 +ℤ x58);
+ uint64_t x84 = (x83 >> 0x1e);
+ uint32_t x85 = (x83 & 0x3fffffff);
+ ℤ x86 = (x81 +ℤ x49);
+ uint64_t x87 = (x86 >> 0x1e);
+ uint32_t x88 = (x86 & 0x3fffffff);
+ ℤ x89 = (x84 +ℤ x57);
+ uint64_t x90 = (x89 >> 0x1e);
+ uint32_t x91 = (x89 & 0x3fffffff);
+ ℤ x92 = (x87 +ℤ x48);
+ uint64_t x93 = (x92 >> 0x1e);
+ uint32_t x94 = (x92 & 0x3fffffff);
+ ℤ x95 = (x90 +ℤ x56);
+ uint64_t x96 = (x95 >> 0x1e);
+ uint32_t x97 = (x95 & 0x3fffffff);
+ ℤ x98 = (x93 +ℤ x47);
+ uint64_t x99 = (x98 >> 0x1e);
+ uint32_t x100 = (x98 & 0x3fffffff);
+ ℤ x101 = (x96 +ℤ x55);
+ uint64_t x102 = (x101 >> 0x1e);
+ uint32_t x103 = (x101 & 0x3fffffff);
+ ℤ x104 = (x99 +ℤ x46);
+ uint64_t x105 = (x104 >> 0x1e);
+ uint32_t x106 = (x104 & 0x3fffffff);
+ ℤ x107 = (x102 +ℤ x54);
+ uint64_t x108 = (x107 >> 0x1e);
+ uint32_t x109 = (x107 & 0x3fffffff);
+ uint64_t x110 = (x105 + x67);
+ uint32_t x111 = (uint32_t) (x110 >> 0x1e);
+ uint32_t x112 = ((uint32_t)x110 & 0x3fffffff);
+ uint64_t x113 = (x108 + x62);
+ uint32_t x114 = (uint32_t) (x113 >> 0x1e);
+ uint32_t x115 = ((uint32_t)x113 & 0x3fffffff);
+ uint64_t x116 = (((uint64_t)0x40000000 * x111) + x112);
+ uint32_t x117 = (uint32_t) (x116 >> 0x1e);
+ uint32_t x118 = ((uint32_t)x116 & 0x3fffffff);
+ uint32_t x119 = ((x114 + x70) + x117);
+ uint32_t x120 = (x119 >> 0x1e);
+ uint32_t x121 = (x119 & 0x3fffffff);
+ uint32_t x122 = (x73 + x117);
+ uint32_t x123 = (x122 >> 0x1e);
+ uint32_t x124 = (x122 & 0x3fffffff);
+ return (Return x118, Return x106, Return x100, Return x94, Return x88, Return x82, (x120 + x76), Return x121, Return x115, Return x109, Return x103, Return x97, Return x91, Return x85, (x123 + x79), Return x124))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/solinas32_2e480m2e240m1/freeze.c b/src/Specific/solinas32_2e480m2e240m1/freeze.c
new file mode 100644
index 000000000..6d0580ee4
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint32_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 30 Syntax.TWord 3 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 5 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas32_2e480m2e240m1/freeze.h b/src/Specific/solinas32_2e480m2e240m1/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas32_2e480m2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas32_2e480m2e240m1/freezeDisplay.log b/src/Specific/solinas32_2e480m2e240m1/freezeDisplay.log
index 09bf4d375..b3f3cf7e7 100644
--- a/src/Specific/solinas32_2e480m2e240m1/freezeDisplay.log
+++ b/src/Specific/solinas32_2e480m2e240m1/freezeDisplay.log
@@ -19,37 +19,37 @@ Interp-η
uint32_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x72, Return x30, 0x3fffffff);
uint32_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x75, Return x29, 0x3fffffff);
uint32_t x79 = (uint32_t)cmovznz(x78, 0x0, 0xffffffff);
- uint32_t x80 = x79 & 0x3fffffff;
+ uint32_t x80 = (x79 & 0x3fffffff);
uint32_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
- uint32_t x84 = x79 & 0x3fffffff;
+ uint32_t x84 = (x79 & 0x3fffffff);
uint32_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
- uint32_t x88 = x79 & 0x3fffffff;
+ uint32_t x88 = (x79 & 0x3fffffff);
uint32_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
- uint32_t x92 = x79 & 0x3fffffff;
+ uint32_t x92 = (x79 & 0x3fffffff);
uint32_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
- uint32_t x96 = x79 & 0x3fffffff;
+ uint32_t x96 = (x79 & 0x3fffffff);
uint32_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
- uint32_t x100 = x79 & 0x3fffffff;
+ uint32_t x100 = (x79 & 0x3fffffff);
uint32_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
- uint32_t x104 = x79 & 0x3fffffff;
+ uint32_t x104 = (x79 & 0x3fffffff);
uint32_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
- uint32_t x108 = x79 & 0x3fffffff;
+ uint32_t x108 = (x79 & 0x3fffffff);
uint32_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
- uint32_t x112 = x79 & 0x3ffffffe;
+ uint32_t x112 = (x79 & 0x3ffffffe);
uint32_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
- uint32_t x116 = x79 & 0x3fffffff;
+ uint32_t x116 = (x79 & 0x3fffffff);
uint32_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
- uint32_t x120 = x79 & 0x3fffffff;
+ uint32_t x120 = (x79 & 0x3fffffff);
uint32_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
- uint32_t x124 = x79 & 0x3fffffff;
+ uint32_t x124 = (x79 & 0x3fffffff);
uint32_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
- uint32_t x128 = x79 & 0x3fffffff;
+ uint32_t x128 = (x79 & 0x3fffffff);
uint32_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
- uint32_t x132 = x79 & 0x3fffffff;
+ uint32_t x132 = (x79 & 0x3fffffff);
uint32_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
- uint32_t x136 = x79 & 0x3fffffff;
+ uint32_t x136 = (x79 & 0x3fffffff);
uint32_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
- uint32_t x140 = x79 & 0x3fffffff;
+ uint32_t x140 = (x79 & 0x3fffffff);
uint32_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 5) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
(Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
x
diff --git a/src/Specific/solinas64_2e127m1/femul.c b/src/Specific/solinas64_2e127m1/femul.c
new file mode 100644
index 000000000..e38dc1016
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + ((uint128_t)x6 * x10));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x2b);
+{ uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x2a);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x2a);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffff);
+{ uint64_t x23 = (x16 + x21);
+{ uint64_t x24 = (x23 >> 0x2b);
+{ uint64_t x25 = (x23 & 0x7ffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x2a);
+{ uint64_t x28 = (x26 & 0x3ffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e127m1/femul.h b/src/Specific/solinas64_2e127m1/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e127m1/fesquare.c b/src/Specific/solinas64_2e127m1/fesquare.c
new file mode 100644
index 000000000..80e046893
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((uint128_t)x3 * x3));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x2b);
+{ uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x2a);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2a);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffff);
+{ uint64_t x16 = (x9 + x14);
+{ uint64_t x17 = (x16 >> 0x2b);
+{ uint64_t x18 = (x16 & 0x7ffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x2a);
+{ uint64_t x21 = (x19 & 0x3ffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e127m1/fesquare.h b/src/Specific/solinas64_2e127m1/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e127m1/freeze.c b/src/Specific/solinas64_2e127m1/freeze.c
new file mode 100644
index 000000000..ae530ce17
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e127m1/freeze.h b/src/Specific/solinas64_2e127m1/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e127m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e129m25/femul.c b/src/Specific/solinas64_2e129m25/femul.c
new file mode 100644
index 000000000..85658de00
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x2b);
+{ uint64_t x16 = ((uint64_t)x14 & 0x7ffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x2b);
+{ uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x2b);
+{ uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
+{ uint64_t x23 = (x16 + (0x19 * x21));
+{ uint64_t x24 = (x23 >> 0x2b);
+{ uint64_t x25 = (x23 & 0x7ffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x2b);
+{ uint64_t x28 = (x26 & 0x7ffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e129m25/femul.h b/src/Specific/solinas64_2e129m25/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e129m25/fesquare.c b/src/Specific/solinas64_2e129m25/fesquare.c
new file mode 100644
index 000000000..4bc1e1a75
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x2b);
+{ uint64_t x9 = ((uint64_t)x7 & 0x7ffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x2b);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2b);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
+{ uint64_t x16 = (x9 + (0x19 * x14));
+{ uint64_t x17 = (x16 >> 0x2b);
+{ uint64_t x18 = (x16 & 0x7ffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x2b);
+{ uint64_t x21 = (x19 & 0x7ffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e129m25/fesquare.h b/src/Specific/solinas64_2e129m25/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e129m25/freeze.c b/src/Specific/solinas64_2e129m25/freeze.c
new file mode 100644
index 000000000..30c6c2027
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e129m25/freeze.h b/src/Specific/solinas64_2e129m25/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e129m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e130m5/femul.c b/src/Specific/solinas64_2e130m5/femul.c
new file mode 100644
index 000000000..db6eb8634
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x2c);
+{ uint64_t x16 = ((uint64_t)x14 & 0xfffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x2b);
+{ uint64_t x19 = ((uint64_t)x17 & 0x7ffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x2b);
+{ uint64_t x22 = ((uint64_t)x20 & 0x7ffffffffff);
+{ uint64_t x23 = (x16 + (0x5 * x21));
+{ uint64_t x24 = (x23 >> 0x2c);
+{ uint64_t x25 = (x23 & 0xfffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x2b);
+{ uint64_t x28 = (x26 & 0x7ffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e130m5/femul.h b/src/Specific/solinas64_2e130m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e130m5/fesquare.c b/src/Specific/solinas64_2e130m5/fesquare.c
new file mode 100644
index 000000000..d55560f0c
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x2c);
+{ uint64_t x9 = ((uint64_t)x7 & 0xfffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x2b);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2b);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffff);
+{ uint64_t x16 = (x9 + (0x5 * x14));
+{ uint64_t x17 = (x16 >> 0x2c);
+{ uint64_t x18 = (x16 & 0xfffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x2b);
+{ uint64_t x21 = (x19 & 0x7ffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e130m5/fesquare.h b/src/Specific/solinas64_2e130m5/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e130m5/freeze.c b/src/Specific/solinas64_2e130m5/freeze.c
new file mode 100644
index 000000000..3ee95bba8
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 44 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e130m5/freeze.h b/src/Specific/solinas64_2e130m5/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e130m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e137m13/femul.c b/src/Specific/solinas64_2e137m13/femul.c
new file mode 100644
index 000000000..085d80ee8
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xd * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xd * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xd * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x22);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x22);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x22);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3ffffffff);
+{ uint64_t x31 = (x21 + (0xd * x29));
+{ uint64_t x32 = (x31 >> 0x23);
+{ uint64_t x33 = (x31 & 0x7ffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x22);
+{ uint64_t x36 = (x34 & 0x3ffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e137m13/femul.h b/src/Specific/solinas64_2e137m13/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e137m13/fesquare.c b/src/Specific/solinas64_2e137m13/fesquare.c
new file mode 100644
index 000000000..6858e3e57
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xd * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xd * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xd * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x23);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x22);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x22);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x22);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffff);
+{ uint64_t x22 = (x12 + (0xd * x20));
+{ uint64_t x23 = (x22 >> 0x23);
+{ uint64_t x24 = (x22 & 0x7ffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x22);
+{ uint64_t x27 = (x25 & 0x3ffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e137m13/fesquare.h b/src/Specific/solinas64_2e137m13/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e137m13/freeze.c b/src/Specific/solinas64_2e137m13/freeze.c
new file mode 100644
index 000000000..d4ed86806
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 35 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffff3;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e137m13/freeze.h b/src/Specific/solinas64_2e137m13/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e137m13/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e140m27/femul.c b/src/Specific/solinas64_2e140m27/femul.c
new file mode 100644
index 000000000..7f824389c
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1b * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x23);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x23);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x23);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffff);
+{ uint64_t x31 = (x21 + (0x1b * x29));
+{ uint64_t x32 = (x31 >> 0x23);
+{ uint64_t x33 = (x31 & 0x7ffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x23);
+{ uint64_t x36 = (x34 & 0x7ffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e140m27/femul.h b/src/Specific/solinas64_2e140m27/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e140m27/fesquare.c b/src/Specific/solinas64_2e140m27/fesquare.c
new file mode 100644
index 000000000..ce19267d7
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1b * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x23);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7ffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x23);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x23);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x23);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffff);
+{ uint64_t x22 = (x12 + (0x1b * x20));
+{ uint64_t x23 = (x22 >> 0x23);
+{ uint64_t x24 = (x22 & 0x7ffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x23);
+{ uint64_t x27 = (x25 & 0x7ffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e140m27/fesquare.h b/src/Specific/solinas64_2e140m27/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e140m27/freeze.c b/src/Specific/solinas64_2e140m27/freeze.c
new file mode 100644
index 000000000..5423b0cdf
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 35 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffe5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e140m27/freeze.h b/src/Specific/solinas64_2e140m27/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e140m27/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e141m9/femul.c b/src/Specific/solinas64_2e141m9/femul.c
new file mode 100644
index 000000000..41f04986c
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x9 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x9 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x2f);
+{ uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x2f);
+{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x2f);
+{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffff);
+{ uint64_t x23 = (x16 + (0x9 * x21));
+{ uint64_t x24 = (x23 >> 0x2f);
+{ uint64_t x25 = (x23 & 0x7fffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x2f);
+{ uint64_t x28 = (x26 & 0x7fffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e141m9/femul.h b/src/Specific/solinas64_2e141m9/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e141m9/fesquare.c b/src/Specific/solinas64_2e141m9/fesquare.c
new file mode 100644
index 000000000..9152de53a
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x2f);
+{ uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x2f);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+{ uint64_t x16 = (x9 + (0x9 * x14));
+{ uint64_t x17 = (x16 >> 0x2f);
+{ uint64_t x18 = (x16 & 0x7fffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x2f);
+{ uint64_t x21 = (x19 & 0x7fffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e141m9/fesquare.h b/src/Specific/solinas64_2e141m9/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e141m9/freeze.c b/src/Specific/solinas64_2e141m9/freeze.c
new file mode 100644
index 000000000..97be73608
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e141m9/freeze.h b/src/Specific/solinas64_2e141m9/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e141m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e150m3/femul.c b/src/Specific/solinas64_2e150m3/femul.c
new file mode 100644
index 000000000..92776a173
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x32);
+{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x32);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x32);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
+{ uint64_t x23 = (x16 + (0x3 * x21));
+{ uint64_t x24 = (x23 >> 0x32);
+{ uint64_t x25 = (x23 & 0x3ffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x32);
+{ uint64_t x28 = (x26 & 0x3ffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e150m3/femul.h b/src/Specific/solinas64_2e150m3/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e150m3/fesquare.c b/src/Specific/solinas64_2e150m3/fesquare.c
new file mode 100644
index 000000000..43701ab9e
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x32);
+{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x32);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
+{ uint64_t x16 = (x9 + (0x3 * x14));
+{ uint64_t x17 = (x16 >> 0x32);
+{ uint64_t x18 = (x16 & 0x3ffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x32);
+{ uint64_t x21 = (x19 & 0x3ffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e150m3/fesquare.h b/src/Specific/solinas64_2e150m3/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e150m3/freeze.c b/src/Specific/solinas64_2e150m3/freeze.c
new file mode 100644
index 000000000..5e3a4ce07
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e150m3/freeze.h b/src/Specific/solinas64_2e150m3/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e150m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e150m5/femul.c b/src/Specific/solinas64_2e150m5/femul.c
new file mode 100644
index 000000000..959821713
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x32);
+{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x32);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x32);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffff);
+{ uint64_t x23 = (x16 + (0x5 * x21));
+{ uint64_t x24 = (x23 >> 0x32);
+{ uint64_t x25 = (x23 & 0x3ffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x32);
+{ uint64_t x28 = (x26 & 0x3ffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e150m5/femul.h b/src/Specific/solinas64_2e150m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e150m5/fesquare.c b/src/Specific/solinas64_2e150m5/fesquare.c
new file mode 100644
index 000000000..46ebd9428
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x32);
+{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x32);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffff);
+{ uint64_t x16 = (x9 + (0x5 * x14));
+{ uint64_t x17 = (x16 >> 0x32);
+{ uint64_t x18 = (x16 & 0x3ffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x32);
+{ uint64_t x21 = (x19 & 0x3ffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e150m5/fesquare.h b/src/Specific/solinas64_2e150m5/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e150m5/freeze.c b/src/Specific/solinas64_2e150m5/freeze.c
new file mode 100644
index 000000000..b6f47f0b9
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e150m5/freeze.h b/src/Specific/solinas64_2e150m5/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e150m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e152m17/femul.c b/src/Specific/solinas64_2e152m17/femul.c
new file mode 100644
index 000000000..d3005d0da
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x11 * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x26);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x26);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x26);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffff);
+{ uint64_t x31 = (x21 + (0x11 * x29));
+{ uint64_t x32 = (x31 >> 0x26);
+{ uint64_t x33 = (x31 & 0x3fffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x26);
+{ uint64_t x36 = (x34 & 0x3fffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e152m17/femul.h b/src/Specific/solinas64_2e152m17/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e152m17/fesquare.c b/src/Specific/solinas64_2e152m17/fesquare.c
new file mode 100644
index 000000000..f265f13d1
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x26);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3fffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x26);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3fffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x26);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+{ uint64_t x22 = (x12 + (0x11 * x20));
+{ uint64_t x23 = (x22 >> 0x26);
+{ uint64_t x24 = (x22 & 0x3fffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x26);
+{ uint64_t x27 = (x25 & 0x3fffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e152m17/fesquare.h b/src/Specific/solinas64_2e152m17/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e152m17/freeze.c b/src/Specific/solinas64_2e152m17/freeze.c
new file mode 100644
index 000000000..7ca849a69
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 38 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e152m17/freeze.h b/src/Specific/solinas64_2e152m17/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e152m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e158m15/femul.c b/src/Specific/solinas64_2e158m15/femul.c
new file mode 100644
index 000000000..fe126f9a5
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xf * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x28);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x27);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x28);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x27);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
+{ uint64_t x31 = (x21 + (0xf * x29));
+{ uint64_t x32 = (x31 >> 0x28);
+{ uint64_t x33 = (x31 & 0xffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x27);
+{ uint64_t x36 = (x34 & 0x7fffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e158m15/femul.h b/src/Specific/solinas64_2e158m15/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e158m15/fesquare.c b/src/Specific/solinas64_2e158m15/fesquare.c
new file mode 100644
index 000000000..a400d508b
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x28);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x27);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x28);
+{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x27);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffff);
+{ uint64_t x22 = (x12 + (0xf * x20));
+{ uint64_t x23 = (x22 >> 0x28);
+{ uint64_t x24 = (x22 & 0xffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x27);
+{ uint64_t x27 = (x25 & 0x7fffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e158m15/fesquare.h b/src/Specific/solinas64_2e158m15/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e158m15/freeze.c b/src/Specific/solinas64_2e158m15/freeze.c
new file mode 100644
index 000000000..46dd0dec9
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 40 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e158m15/freeze.h b/src/Specific/solinas64_2e158m15/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e158m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e165m25/femul.c b/src/Specific/solinas64_2e165m25/femul.c
new file mode 100644
index 000000000..a737e0bac
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x19 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x19 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint128_t x15 = (x14 >> 0x37);
+{ uint64_t x16 = ((uint64_t)x14 & 0x7fffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x37);
+{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x37);
+{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
+{ uint128_t x23 = (x16 + ((uint128_t)0x19 * x21));
+{ uint64_t x24 = (uint64_t) (x23 >> 0x37);
+{ uint64_t x25 = ((uint64_t)x23 & 0x7fffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x37);
+{ uint64_t x28 = (x26 & 0x7fffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e165m25/femul.h b/src/Specific/solinas64_2e165m25/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e165m25/fesquare.c b/src/Specific/solinas64_2e165m25/fesquare.c
new file mode 100644
index 000000000..34b1fe082
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x19 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint128_t x8 = (x7 >> 0x37);
+{ uint64_t x9 = ((uint64_t)x7 & 0x7fffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x37);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+{ uint128_t x16 = (x9 + ((uint128_t)0x19 * x14));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x37);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x37);
+{ uint64_t x21 = (x19 & 0x7fffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e165m25/fesquare.h b/src/Specific/solinas64_2e165m25/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e165m25/freeze.c b/src/Specific/solinas64_2e165m25/freeze.c
new file mode 100644
index 000000000..8204e0075
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e165m25/freeze.h b/src/Specific/solinas64_2e165m25/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e165m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e166m5/femul.c b/src/Specific/solinas64_2e166m5/femul.c
new file mode 100644
index 000000000..77c28c681
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + ((0x2 * ((uint128_t)x7 * x11)) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x5 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x5 * ((0x2 * ((uint128_t)x7 * x10)) + (0x2 * ((uint128_t)x6 * x11)))));
+{ uint64_t x15 = (uint64_t) (x14 >> 0x38);
+{ uint64_t x16 = ((uint64_t)x14 & 0xffffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x37);
+{ uint64_t x19 = ((uint64_t)x17 & 0x7fffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x37);
+{ uint64_t x22 = ((uint64_t)x20 & 0x7fffffffffffff);
+{ uint64_t x23 = (x16 + (0x5 * x21));
+{ uint64_t x24 = (x23 >> 0x38);
+{ uint64_t x25 = (x23 & 0xffffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x37);
+{ uint64_t x28 = (x26 & 0x7fffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e166m5/femul.h b/src/Specific/solinas64_2e166m5/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e166m5/fesquare.c b/src/Specific/solinas64_2e166m5/fesquare.c
new file mode 100644
index 000000000..983b03769
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x3)) + (0x2 * ((uint128_t)x3 * x4)))));
+{ uint64_t x8 = (uint64_t) (x7 >> 0x38);
+{ uint64_t x9 = ((uint64_t)x7 & 0xffffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x37);
+{ uint64_t x12 = ((uint64_t)x10 & 0x7fffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+{ uint64_t x16 = (x9 + (0x5 * x14));
+{ uint64_t x17 = (x16 >> 0x38);
+{ uint64_t x18 = (x16 & 0xffffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x37);
+{ uint64_t x21 = (x19 & 0x7fffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e166m5/fesquare.h b/src/Specific/solinas64_2e166m5/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e166m5/freeze.c b/src/Specific/solinas64_2e166m5/freeze.c
new file mode 100644
index 000000000..1777c507c
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e166m5/freeze.h b/src/Specific/solinas64_2e166m5/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e166m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e171m19/femul.c b/src/Specific/solinas64_2e171m19/femul.c
new file mode 100644
index 000000000..a8f1a1a43
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x13 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x13 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint128_t x15 = (x14 >> 0x39);
+{ uint64_t x16 = ((uint64_t)x14 & 0x1ffffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint128_t x18 = (x17 >> 0x39);
+{ uint64_t x19 = ((uint64_t)x17 & 0x1ffffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x39);
+{ uint64_t x22 = ((uint64_t)x20 & 0x1ffffffffffffff);
+{ uint128_t x23 = (x16 + ((uint128_t)0x13 * x21));
+{ uint64_t x24 = (uint64_t) (x23 >> 0x39);
+{ uint64_t x25 = ((uint64_t)x23 & 0x1ffffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x39);
+{ uint64_t x28 = (x26 & 0x1ffffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e171m19/femul.h b/src/Specific/solinas64_2e171m19/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e171m19/fesquare.c b/src/Specific/solinas64_2e171m19/fesquare.c
new file mode 100644
index 000000000..2b64141cb
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint128_t x8 = (x7 >> 0x39);
+{ uint64_t x9 = ((uint64_t)x7 & 0x1ffffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint128_t x11 = (x10 >> 0x39);
+{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x39);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+{ uint128_t x16 = (x9 + ((uint128_t)0x13 * x14));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x39);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x39);
+{ uint64_t x21 = (x19 & 0x1ffffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e171m19/fesquare.h b/src/Specific/solinas64_2e171m19/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e171m19/freeze.c b/src/Specific/solinas64_2e171m19/freeze.c
new file mode 100644
index 000000000..d394e0b1a
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffffffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e171m19/freeze.h b/src/Specific/solinas64_2e171m19/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e171m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e174m17/femul.c b/src/Specific/solinas64_2e174m17/femul.c
new file mode 100644
index 000000000..23271b84a
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x11 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x11 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint128_t x15 = (x14 >> 0x3a);
+{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint128_t x18 = (x17 >> 0x3a);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x3a);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
+{ uint128_t x23 = (x16 + ((uint128_t)0x11 * x21));
+{ uint64_t x24 = (uint64_t) (x23 >> 0x3a);
+{ uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x3a);
+{ uint64_t x28 = (x26 & 0x3ffffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e174m17/femul.h b/src/Specific/solinas64_2e174m17/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e174m17/fesquare.c b/src/Specific/solinas64_2e174m17/fesquare.c
new file mode 100644
index 000000000..1df201c99
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint128_t x8 = (x7 >> 0x3a);
+{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint128_t x11 = (x10 >> 0x3a);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x3a);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
+{ uint128_t x16 = (x9 + ((uint128_t)0x11 * x14));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x3a);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x3a);
+{ uint64_t x21 = (x19 & 0x3ffffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e174m17/fesquare.h b/src/Specific/solinas64_2e174m17/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e174m17/freeze.c b/src/Specific/solinas64_2e174m17/freeze.c
new file mode 100644
index 000000000..af0c7eacb
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e174m17/freeze.h b/src/Specific/solinas64_2e174m17/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e174m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e174m3/femul.c b/src/Specific/solinas64_2e174m3/femul.c
new file mode 100644
index 000000000..4b25d07dd
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/femul.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9)
+{ uint128_t x12 = (((uint128_t)x5 * x10) + (((uint128_t)x7 * x11) + ((uint128_t)x6 * x9)));
+{ uint128_t x13 = ((((uint128_t)x5 * x11) + ((uint128_t)x7 * x9)) + (0x3 * ((uint128_t)x6 * x10)));
+{ uint128_t x14 = (((uint128_t)x5 * x9) + (0x3 * (((uint128_t)x7 * x10) + ((uint128_t)x6 * x11))));
+{ uint128_t x15 = (x14 >> 0x3a);
+{ uint64_t x16 = ((uint64_t)x14 & 0x3ffffffffffffff);
+{ uint128_t x17 = (x15 + x13);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x3a);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3ffffffffffffff);
+{ uint128_t x20 = (x18 + x12);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x3a);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3ffffffffffffff);
+{ uint128_t x23 = (x16 + ((uint128_t)0x3 * x21));
+{ uint64_t x24 = (uint64_t) (x23 >> 0x3a);
+{ uint64_t x25 = ((uint64_t)x23 & 0x3ffffffffffffff);
+{ uint64_t x26 = (x24 + x19);
+{ uint64_t x27 = (x26 >> 0x3a);
+{ uint64_t x28 = (x26 & 0x3ffffffffffffff);
+out[0] = x27 + x22;
+out[1] = x28;
+out[2] = x25;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e174m3/femul.h b/src/Specific/solinas64_2e174m3/femul.h
new file mode 100644
index 000000000..e0b73959c
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x6, uint64_t x7, uint64_t x5, uint64_t x10, uint64_t x11, uint64_t x9);
diff --git a/src/Specific/solinas64_2e174m3/fesquare.c b/src/Specific/solinas64_2e174m3/fesquare.c
new file mode 100644
index 000000000..15db1a41d
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/fesquare.c
@@ -0,0 +1,41 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+{ uint128_t x5 = (((uint128_t)x2 * x3) + (((uint128_t)x4 * x4) + ((uint128_t)x3 * x2)));
+{ uint128_t x6 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * ((uint128_t)x3 * x3)));
+{ uint128_t x7 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x3) + ((uint128_t)x3 * x4))));
+{ uint128_t x8 = (x7 >> 0x3a);
+{ uint64_t x9 = ((uint64_t)x7 & 0x3ffffffffffffff);
+{ uint128_t x10 = (x8 + x6);
+{ uint64_t x11 = (uint64_t) (x10 >> 0x3a);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+{ uint128_t x13 = (x11 + x5);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x3a);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3ffffffffffffff);
+{ uint128_t x16 = (x9 + ((uint128_t)0x3 * x14));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x3a);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+{ uint64_t x19 = (x17 + x12);
+{ uint64_t x20 = (x19 >> 0x3a);
+{ uint64_t x21 = (x19 & 0x3ffffffffffffff);
+out[0] = x20 + x15;
+out[1] = x21;
+out[2] = x18;
+}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[3];
diff --git a/src/Specific/solinas64_2e174m3/fesquare.h b/src/Specific/solinas64_2e174m3/fesquare.h
new file mode 100644
index 000000000..1ed96cada
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e174m3/freeze.c b/src/Specific/solinas64_2e174m3/freeze.c
new file mode 100644
index 000000000..57ba69349
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x6;
+out[1] = uint8_t x7 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e174m3/freeze.h b/src/Specific/solinas64_2e174m3/freeze.h
new file mode 100644
index 000000000..b11039470
--- /dev/null
+++ b/src/Specific/solinas64_2e174m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x3, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e189m25/femul.c b/src/Specific/solinas64_2e189m25/femul.c
new file mode 100644
index 000000000..02c646634
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x19 * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x19 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x19 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x2f);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x2f);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
+{ uint64_t x31 = (x21 + (0x19 * x29));
+{ uint64_t x32 = (x31 >> 0x30);
+{ uint64_t x33 = (x31 & 0xffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x2f);
+{ uint64_t x36 = (x34 & 0x7fffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e189m25/femul.h b/src/Specific/solinas64_2e189m25/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e189m25/fesquare.c b/src/Specific/solinas64_2e189m25/fesquare.c
new file mode 100644
index 000000000..5b1275f6e
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x2f);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+{ uint64_t x22 = (x12 + (0x19 * x20));
+{ uint64_t x23 = (x22 >> 0x30);
+{ uint64_t x24 = (x22 & 0xffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x2f);
+{ uint64_t x27 = (x25 & 0x7fffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e189m25/fesquare.h b/src/Specific/solinas64_2e189m25/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e189m25/freeze.c b/src/Specific/solinas64_2e189m25/freeze.c
new file mode 100644
index 000000000..7c9edcf90
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e189m25/freeze.h b/src/Specific/solinas64_2e189m25/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e189m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e190m11/femul.c b/src/Specific/solinas64_2e190m11/femul.c
new file mode 100644
index 000000000..6e06f6a3a
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0xb * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xb * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xb * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x30);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x2f);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffff);
+{ uint64_t x31 = (x21 + (0xb * x29));
+{ uint64_t x32 = (x31 >> 0x30);
+{ uint64_t x33 = (x31 & 0xffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x2f);
+{ uint64_t x36 = (x34 & 0x7fffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e190m11/femul.h b/src/Specific/solinas64_2e190m11/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e190m11/fesquare.c b/src/Specific/solinas64_2e190m11/fesquare.c
new file mode 100644
index 000000000..b6a920e95
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xb * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xb * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xb * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x30);
+{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+{ uint64_t x22 = (x12 + (0xb * x20));
+{ uint64_t x23 = (x22 >> 0x30);
+{ uint64_t x24 = (x22 & 0xffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x2f);
+{ uint64_t x27 = (x25 & 0x7fffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e190m11/fesquare.h b/src/Specific/solinas64_2e190m11/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e190m11/freeze.c b/src/Specific/solinas64_2e190m11/freeze.c
new file mode 100644
index 000000000..9dffcedef
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffff5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e190m11/freeze.h b/src/Specific/solinas64_2e190m11/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e190m11/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e191m19/femul.c b/src/Specific/solinas64_2e191m19/femul.c
new file mode 100644
index 000000000..1283c97e0
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+{ uint64_t x25 = (uint64_t) (x24 >> 0x27);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7fffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x26);
+{ uint64_t x29 = ((uint64_t)x27 & 0x3fffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x26);
+{ uint64_t x32 = ((uint64_t)x30 & 0x3fffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x26);
+{ uint64_t x35 = ((uint64_t)x33 & 0x3fffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x26);
+{ uint64_t x38 = ((uint64_t)x36 & 0x3fffffffff);
+{ uint64_t x39 = (x26 + (0x13 * x37));
+{ uint64_t x40 = (x39 >> 0x27);
+{ uint64_t x41 = (x39 & 0x7fffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x26);
+{ uint64_t x44 = (x42 & 0x3fffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e191m19/femul.h b/src/Specific/solinas64_2e191m19/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e191m19/fesquare.c b/src/Specific/solinas64_2e191m19/fesquare.c
new file mode 100644
index 000000000..f212bd63c
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+{ uint64_t x14 = (uint64_t) (x13 >> 0x27);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x26);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x26);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x26);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x26);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffff);
+{ uint64_t x28 = (x15 + (0x13 * x26));
+{ uint64_t x29 = (x28 >> 0x27);
+{ uint64_t x30 = (x28 & 0x7fffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x26);
+{ uint64_t x33 = (x31 & 0x3fffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e191m19/fesquare.h b/src/Specific/solinas64_2e191m19/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e191m19/freeze.c b/src/Specific/solinas64_2e191m19/freeze.c
new file mode 100644
index 000000000..f34ff816e
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 39 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e191m19/freeze.h b/src/Specific/solinas64_2e191m19/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e191m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e192m2e64m1/femul.c b/src/Specific/solinas64_2e192m2e64m1/femul.c
new file mode 100644
index 000000000..1ba8252bf
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/femul.c
@@ -0,0 +1,57 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = ((((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11)))) + (0x10000 * ((uint128_t)x8 * x14)));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (((uint128_t)x8 * x14) + (0x10000 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + ((((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)) + (0x10000 * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+{ uint128_t x22 = (x16 >> 0x30);
+{ uint64_t x23 = ((uint64_t)x16 & 0xffffffffffff);
+{ uint128_t x24 = ((0x1000000000000 * x22) + x23);
+{ uint128_t x25 = (x24 >> 0x30);
+{ uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
+{ uint128_t x27 = ((x20 + x18) + (0x10000 * x25));
+{ uint128_t x28 = (x27 >> 0x30);
+{ uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
+{ uint128_t x30 = (x21 + x25);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x30);
+{ uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
+{ uint128_t x33 = (x28 + x17);
+{ uint128_t x34 = (x33 >> 0x30);
+{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffff);
+{ uint128_t x36 = (x34 + x26);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x30);
+{ uint64_t x38 = ((uint64_t)x36 & 0xffffffffffff);
+{ uint128_t x39 = (((uint128_t)0x1000000000000 * x37) + x38);
+{ uint64_t x40 = (uint64_t) (x39 >> 0x30);
+{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
+{ uint64_t x42 = ((x31 + x29) + (0x10000 * x40));
+{ uint64_t x43 = (x42 >> 0x30);
+{ uint64_t x44 = (x42 & 0xffffffffffff);
+{ uint64_t x45 = (x32 + x40);
+{ uint64_t x46 = (x45 >> 0x30);
+{ uint64_t x47 = (x45 & 0xffffffffffff);
+out[0] = x41;
+out[1] = x43 + x35;
+out[2] = x46 + x44;
+out[3] = x47;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e192m2e64m1/femul.h b/src/Specific/solinas64_2e192m2e64m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e192m2e64m1/fesquare.c b/src/Specific/solinas64_2e192m2e64m1/fesquare.c
new file mode 100644
index 000000000..66b85d83e
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/fesquare.c
@@ -0,0 +1,57 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = ((((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2)))) + (0x10000 * ((uint128_t)x5 * x5)));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x5 * x5) + (0x10000 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)) + (0x10000 * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x30);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffff);
+{ uint128_t x13 = (x7 >> 0x30);
+{ uint64_t x14 = ((uint64_t)x7 & 0xffffffffffff);
+{ uint128_t x15 = ((0x1000000000000 * x13) + x14);
+{ uint128_t x16 = (x15 >> 0x30);
+{ uint64_t x17 = ((uint64_t)x15 & 0xffffffffffff);
+{ uint128_t x18 = ((x11 + x9) + (0x10000 * x16));
+{ uint128_t x19 = (x18 >> 0x30);
+{ uint64_t x20 = ((uint64_t)x18 & 0xffffffffffff);
+{ uint128_t x21 = (x12 + x16);
+{ uint64_t x22 = (uint64_t) (x21 >> 0x30);
+{ uint64_t x23 = ((uint64_t)x21 & 0xffffffffffff);
+{ uint128_t x24 = (x19 + x8);
+{ uint128_t x25 = (x24 >> 0x30);
+{ uint64_t x26 = ((uint64_t)x24 & 0xffffffffffff);
+{ uint128_t x27 = (x25 + x17);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x30);
+{ uint64_t x29 = ((uint64_t)x27 & 0xffffffffffff);
+{ uint128_t x30 = (((uint128_t)0x1000000000000 * x28) + x29);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x30);
+{ uint64_t x32 = ((uint64_t)x30 & 0xffffffffffff);
+{ uint64_t x33 = ((x22 + x20) + (0x10000 * x31));
+{ uint64_t x34 = (x33 >> 0x30);
+{ uint64_t x35 = (x33 & 0xffffffffffff);
+{ uint64_t x36 = (x23 + x31);
+{ uint64_t x37 = (x36 >> 0x30);
+{ uint64_t x38 = (x36 & 0xffffffffffff);
+out[0] = x32;
+out[1] = x34 + x26;
+out[2] = x37 + x35;
+out[3] = x38;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e192m2e64m1/fesquare.h b/src/Specific/solinas64_2e192m2e64m1/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e192m2e64m1/freeze.c b/src/Specific/solinas64_2e192m2e64m1/freeze.c
new file mode 100644
index 000000000..8e3be6645
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e192m2e64m1/freeze.h b/src/Specific/solinas64_2e192m2e64m1/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e192m2e64m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e194m33/femul.c b/src/Specific/solinas64_2e194m33/femul.c
new file mode 100644
index 000000000..0a72aa067
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x21 * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x21 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x21 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x30);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x31);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x30);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
+{ uint64_t x31 = (x21 + (0x21 * x29));
+{ uint64_t x32 = (x31 >> 0x31);
+{ uint64_t x33 = (x31 & 0x1ffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x30);
+{ uint64_t x36 = (x34 & 0xffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e194m33/femul.h b/src/Specific/solinas64_2e194m33/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e194m33/fesquare.c b/src/Specific/solinas64_2e194m33/fesquare.c
new file mode 100644
index 000000000..b2d94772d
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x21 * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x21 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x21 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x31);
+{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x30);
+{ uint64_t x15 = ((uint64_t)x13 & 0xffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x31);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x30);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffff);
+{ uint64_t x22 = (x12 + (0x21 * x20));
+{ uint64_t x23 = (x22 >> 0x31);
+{ uint64_t x24 = (x22 & 0x1ffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x30);
+{ uint64_t x27 = (x25 & 0xffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e194m33/fesquare.h b/src/Specific/solinas64_2e194m33/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e194m33/freeze.c b/src/Specific/solinas64_2e194m33/freeze.c
new file mode 100644
index 000000000..f11db2654
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 49 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffffffdf;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e194m33/freeze.h b/src/Specific/solinas64_2e194m33/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e194m33/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e196m15/femul.c b/src/Specific/solinas64_2e196m15/femul.c
new file mode 100644
index 000000000..311023c9f
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0xf * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0xf * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0xf * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x31);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x31);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x31);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
+{ uint64_t x31 = (x21 + (0xf * x29));
+{ uint64_t x32 = (x31 >> 0x31);
+{ uint64_t x33 = (x31 & 0x1ffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x31);
+{ uint64_t x36 = (x34 & 0x1ffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e196m15/femul.h b/src/Specific/solinas64_2e196m15/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e196m15/fesquare.c b/src/Specific/solinas64_2e196m15/fesquare.c
new file mode 100644
index 000000000..f71b0d879
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x31);
+{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x31);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x31);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+{ uint64_t x22 = (x12 + (0xf * x20));
+{ uint64_t x23 = (x22 >> 0x31);
+{ uint64_t x24 = (x22 & 0x1ffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x31);
+{ uint64_t x27 = (x25 & 0x1ffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e196m15/fesquare.h b/src/Specific/solinas64_2e196m15/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e196m15/freeze.c b/src/Specific/solinas64_2e196m15/freeze.c
new file mode 100644
index 000000000..c9b2e9e7d
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 49 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e196m15/freeze.h b/src/Specific/solinas64_2e196m15/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e196m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e198m17/femul.c b/src/Specific/solinas64_2e198m17/femul.c
new file mode 100644
index 000000000..aae9230df
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x11 * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x11 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x11 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x32);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x31);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x31);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffff);
+{ uint64_t x31 = (x21 + (0x11 * x29));
+{ uint64_t x32 = (x31 >> 0x32);
+{ uint64_t x33 = (x31 & 0x3ffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x31);
+{ uint64_t x36 = (x34 & 0x1ffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e198m17/femul.h b/src/Specific/solinas64_2e198m17/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e198m17/fesquare.c b/src/Specific/solinas64_2e198m17/fesquare.c
new file mode 100644
index 000000000..626385e88
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x32);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x31);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x32);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x31);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffff);
+{ uint64_t x22 = (x12 + (0x11 * x20));
+{ uint64_t x23 = (x22 >> 0x32);
+{ uint64_t x24 = (x22 & 0x3ffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x31);
+{ uint64_t x27 = (x25 & 0x1ffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e198m17/fesquare.h b/src/Specific/solinas64_2e198m17/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e198m17/freeze.c b/src/Specific/solinas64_2e198m17/freeze.c
new file mode 100644
index 000000000..6f03648b3
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e198m17/freeze.h b/src/Specific/solinas64_2e198m17/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e198m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e205m45x2e198m1/freeze.c b/src/Specific/solinas64_2e205m45x2e198m1/freeze.c
new file mode 100644
index 000000000..0fb0d73ea
--- /dev/null
+++ b/src/Specific/solinas64_2e205m45x2e198m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e205m45x2e198m1/freeze.h b/src/Specific/solinas64_2e205m45x2e198m1/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e205m45x2e198m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e206m5/femul.c b/src/Specific/solinas64_2e206m5/femul.c
new file mode 100644
index 000000000..8642d8816
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x34);
+{ uint64_t x21 = ((uint64_t)x19 & 0xfffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x34);
+{ uint64_t x27 = ((uint64_t)x25 & 0xfffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+{ uint64_t x31 = (x21 + (0x5 * x29));
+{ uint64_t x32 = (x31 >> 0x34);
+{ uint64_t x33 = (x31 & 0xfffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x33);
+{ uint64_t x36 = (x34 & 0x7ffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e206m5/femul.h b/src/Specific/solinas64_2e206m5/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e206m5/fesquare.c b/src/Specific/solinas64_2e206m5/fesquare.c
new file mode 100644
index 000000000..c3a9b9c6f
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x34);
+{ uint64_t x12 = ((uint64_t)x10 & 0xfffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x34);
+{ uint64_t x18 = ((uint64_t)x16 & 0xfffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ uint64_t x22 = (x12 + (0x5 * x20));
+{ uint64_t x23 = (x22 >> 0x34);
+{ uint64_t x24 = (x22 & 0xfffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x33);
+{ uint64_t x27 = (x25 & 0x7ffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e206m5/fesquare.h b/src/Specific/solinas64_2e206m5/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e206m5/freeze.c b/src/Specific/solinas64_2e206m5/freeze.c
new file mode 100644
index 000000000..3b2652885
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e206m5/freeze.h b/src/Specific/solinas64_2e206m5/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e206m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e212m29/femul.c b/src/Specific/solinas64_2e212m29/femul.c
new file mode 100644
index 000000000..24bb4cc1f
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + (((uint128_t)x7 * x13) + ((uint128_t)x9 * x11))) + (0x1d * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1d * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1d * (((uint128_t)x7 * x14) + (((uint128_t)x9 * x15) + ((uint128_t)x8 * x13)))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+{ uint64_t x31 = (x21 + (0x1d * x29));
+{ uint64_t x32 = (x31 >> 0x35);
+{ uint64_t x33 = (x31 & 0x1fffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x35);
+{ uint64_t x36 = (x34 & 0x1fffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e212m29/femul.h b/src/Specific/solinas64_2e212m29/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e212m29/fesquare.c b/src/Specific/solinas64_2e212m29/fesquare.c
new file mode 100644
index 000000000..7c4f36e46
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1d * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1d * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1d * (((uint128_t)x4 * x5) + (((uint128_t)x6 * x6) + ((uint128_t)x5 * x4)))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x35);
+{ uint64_t x12 = ((uint64_t)x10 & 0x1fffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x35);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+{ uint64_t x22 = (x12 + (0x1d * x20));
+{ uint64_t x23 = (x22 >> 0x35);
+{ uint64_t x24 = (x22 & 0x1fffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x35);
+{ uint64_t x27 = (x25 & 0x1fffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e212m29/fesquare.h b/src/Specific/solinas64_2e212m29/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e212m29/freeze.c b/src/Specific/solinas64_2e212m29/freeze.c
new file mode 100644
index 000000000..c5b0cf10e
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 53 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffe3;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e212m29/freeze.h b/src/Specific/solinas64_2e212m29/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e212m29/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e213m3/femul.c b/src/Specific/solinas64_2e213m3/femul.c
new file mode 100644
index 000000000..5ce881fe6
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x36);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3fffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+{ uint64_t x31 = (x21 + (0x3 * x29));
+{ uint64_t x32 = (x31 >> 0x36);
+{ uint64_t x33 = (x31 & 0x3fffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x35);
+{ uint64_t x36 = (x34 & 0x1fffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e213m3/femul.h b/src/Specific/solinas64_2e213m3/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e213m3/fesquare.c b/src/Specific/solinas64_2e213m3/fesquare.c
new file mode 100644
index 000000000..1ba8671f7
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x36);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3fffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x35);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1fffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+{ uint64_t x22 = (x12 + (0x3 * x20));
+{ uint64_t x23 = (x22 >> 0x36);
+{ uint64_t x24 = (x22 & 0x3fffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x35);
+{ uint64_t x27 = (x25 & 0x1fffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e213m3/fesquare.h b/src/Specific/solinas64_2e213m3/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e213m3/freeze.c b/src/Specific/solinas64_2e213m3/freeze.c
new file mode 100644
index 000000000..65f4d878d
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e213m3/freeze.h b/src/Specific/solinas64_2e213m3/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e213m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e216m2e108m1/femul.c b/src/Specific/solinas64_2e216m2e108m1/femul.c
new file mode 100644
index 000000000..59d5d9612
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/femul.c
@@ -0,0 +1,59 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)(x7 + x8) * (x13 + x14)) - ((uint128_t)x7 * x13));
+{ uint128_t x17 = ((((uint128_t)(x5 + x9) * (x13 + x14)) + ((uint128_t)(x7 + x8) * (x11 + x15))) - (((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)));
+{ uint128_t x18 = (((uint128_t)(x5 + x9) * (x11 + x15)) - ((uint128_t)x5 * x11));
+{ uint128_t x19 = (((((uint128_t)x7 * x13) + ((uint128_t)x8 * x14)) + x18) + x16);
+{ uint128_t x20 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15)));
+{ uint128_t x21 = ((((uint128_t)x5 * x11) + ((uint128_t)x9 * x15)) + x16);
+{ uint64_t x22 = (uint64_t) (x20 >> 0x36);
+{ uint64_t x23 = ((uint64_t)x20 & 0x3fffffffffffff);
+{ uint64_t x24 = (uint64_t) (x17 >> 0x36);
+{ uint64_t x25 = ((uint64_t)x17 & 0x3fffffffffffff);
+{ uint128_t x26 = (((uint128_t)0x40000000000000 * x24) + x25);
+{ uint64_t x27 = (uint64_t) (x26 >> 0x36);
+{ uint64_t x28 = ((uint64_t)x26 & 0x3fffffffffffff);
+{ uint128_t x29 = ((x22 + x19) + x27);
+{ uint64_t x30 = (uint64_t) (x29 >> 0x36);
+{ uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
+{ uint128_t x32 = (x21 + x27);
+{ uint64_t x33 = (uint64_t) (x32 >> 0x36);
+{ uint64_t x34 = ((uint64_t)x32 & 0x3fffffffffffff);
+{ uint64_t x35 = (x30 + x28);
+{ uint64_t x36 = (x35 >> 0x36);
+{ uint64_t x37 = (x35 & 0x3fffffffffffff);
+{ uint64_t x38 = (x33 + x23);
+{ uint64_t x39 = (x38 >> 0x36);
+{ uint64_t x40 = (x38 & 0x3fffffffffffff);
+{ uint64_t x41 = ((0x40000000000000 * x36) + x37);
+{ uint64_t x42 = (x41 >> 0x36);
+{ uint64_t x43 = (x41 & 0x3fffffffffffff);
+{ uint64_t x44 = ((x39 + x31) + x42);
+{ uint64_t x45 = (x44 >> 0x36);
+{ uint64_t x46 = (x44 & 0x3fffffffffffff);
+{ uint64_t x47 = (x34 + x42);
+{ uint64_t x48 = (x47 >> 0x36);
+{ uint64_t x49 = (x47 & 0x3fffffffffffff);
+out[0] = x45 + x43;
+out[1] = x46;
+out[2] = x48 + x40;
+out[3] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e216m2e108m1/femul.h b/src/Specific/solinas64_2e216m2e108m1/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e216m2e108m1/fesquare.c b/src/Specific/solinas64_2e216m2e108m1/fesquare.c
new file mode 100644
index 000000000..90792a951
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/fesquare.c
@@ -0,0 +1,59 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)(x4 + x5) * (x4 + x5)) - ((uint128_t)x4 * x4));
+{ uint128_t x8 = ((((uint128_t)(x2 + x6) * (x4 + x5)) + ((uint128_t)(x4 + x5) * (x2 + x6))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+{ uint128_t x9 = (((uint128_t)(x2 + x6) * (x2 + x6)) - ((uint128_t)x2 * x2));
+{ uint128_t x10 = (((((uint128_t)x4 * x4) + ((uint128_t)x5 * x5)) + x9) + x7);
+{ uint128_t x11 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6)));
+{ uint128_t x12 = ((((uint128_t)x2 * x2) + ((uint128_t)x6 * x6)) + x7);
+{ uint64_t x13 = (uint64_t) (x11 >> 0x36);
+{ uint64_t x14 = ((uint64_t)x11 & 0x3fffffffffffff);
+{ uint64_t x15 = (uint64_t) (x8 >> 0x36);
+{ uint64_t x16 = ((uint64_t)x8 & 0x3fffffffffffff);
+{ uint128_t x17 = (((uint128_t)0x40000000000000 * x15) + x16);
+{ uint64_t x18 = (uint64_t) (x17 >> 0x36);
+{ uint64_t x19 = ((uint64_t)x17 & 0x3fffffffffffff);
+{ uint128_t x20 = ((x13 + x10) + x18);
+{ uint64_t x21 = (uint64_t) (x20 >> 0x36);
+{ uint64_t x22 = ((uint64_t)x20 & 0x3fffffffffffff);
+{ uint128_t x23 = (x12 + x18);
+{ uint64_t x24 = (uint64_t) (x23 >> 0x36);
+{ uint64_t x25 = ((uint64_t)x23 & 0x3fffffffffffff);
+{ uint64_t x26 = (x21 + x19);
+{ uint64_t x27 = (x26 >> 0x36);
+{ uint64_t x28 = (x26 & 0x3fffffffffffff);
+{ uint64_t x29 = (x24 + x14);
+{ uint64_t x30 = (x29 >> 0x36);
+{ uint64_t x31 = (x29 & 0x3fffffffffffff);
+{ uint64_t x32 = ((0x40000000000000 * x27) + x28);
+{ uint64_t x33 = (x32 >> 0x36);
+{ uint64_t x34 = (x32 & 0x3fffffffffffff);
+{ uint64_t x35 = ((x30 + x22) + x33);
+{ uint64_t x36 = (x35 >> 0x36);
+{ uint64_t x37 = (x35 & 0x3fffffffffffff);
+{ uint64_t x38 = (x25 + x33);
+{ uint64_t x39 = (x38 >> 0x36);
+{ uint64_t x40 = (x38 & 0x3fffffffffffff);
+out[0] = x36 + x34;
+out[1] = x37;
+out[2] = x39 + x31;
+out[3] = x40;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e216m2e108m1/fesquare.h b/src/Specific/solinas64_2e216m2e108m1/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e216m2e108m1/freeze.c b/src/Specific/solinas64_2e216m2e108m1/freeze.c
new file mode 100644
index 000000000..61713cf58
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e216m2e108m1/freeze.h b/src/Specific/solinas64_2e216m2e108m1/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e216m2e108m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e221m3/femul.c b/src/Specific/solinas64_2e221m3/femul.c
new file mode 100644
index 000000000..d792750e6
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + ((0x2 * ((uint128_t)x7 * x15)) + ((0x2 * ((uint128_t)x9 * x13)) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x3 * ((uint128_t)x8 * x14)));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x3 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x3 * ((0x2 * ((uint128_t)x7 * x14)) + ((0x2 * ((uint128_t)x9 * x15)) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x37);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x37);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x37);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+{ uint64_t x31 = (x21 + (0x3 * x29));
+{ uint64_t x32 = (x31 >> 0x38);
+{ uint64_t x33 = (x31 & 0xffffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x37);
+{ uint64_t x36 = (x34 & 0x7fffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e221m3/femul.h b/src/Specific/solinas64_2e221m3/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e221m3/fesquare.c b/src/Specific/solinas64_2e221m3/fesquare.c
new file mode 100644
index 000000000..6448c6975
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((uint128_t)x5 * x5)));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x5)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x38);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint64_t x14 = (uint64_t) (x13 >> 0x37);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x37);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x37);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+{ uint64_t x22 = (x12 + (0x3 * x20));
+{ uint64_t x23 = (x22 >> 0x38);
+{ uint64_t x24 = (x22 & 0xffffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x37);
+{ uint64_t x27 = (x25 & 0x7fffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e221m3/fesquare.h b/src/Specific/solinas64_2e221m3/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e221m3/freeze.c b/src/Specific/solinas64_2e221m3/freeze.c
new file mode 100644
index 000000000..eee23e3d9
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e221m3/freeze.h b/src/Specific/solinas64_2e221m3/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e221m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e222m117/femul.c b/src/Specific/solinas64_2e222m117/femul.c
new file mode 100644
index 000000000..de30ee59e
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x75 * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x75 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x75 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint128_t x20 = (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint128_t x23 = (x22 >> 0x37);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint128_t x26 = (x25 >> 0x38);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x37);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+{ uint128_t x31 = (x21 + ((uint128_t)0x75 * x29));
+{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x37);
+{ uint64_t x36 = (x34 & 0x7fffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e222m117/femul.h b/src/Specific/solinas64_2e222m117/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e222m117/fesquare.c b/src/Specific/solinas64_2e222m117/fesquare.c
new file mode 100644
index 000000000..36b9552e9
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x75 * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x75 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x75 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint128_t x11 = (x10 >> 0x38);
+{ uint64_t x12 = ((uint64_t)x10 & 0xffffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint128_t x14 = (x13 >> 0x37);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint128_t x17 = (x16 >> 0x38);
+{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x37);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+{ uint128_t x22 = (x12 + ((uint128_t)0x75 * x20));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x37);
+{ uint64_t x27 = (x25 & 0x7fffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e222m117/fesquare.h b/src/Specific/solinas64_2e222m117/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e222m117/freeze.c b/src/Specific/solinas64_2e222m117/freeze.c
new file mode 100644
index 000000000..100ce761c
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffff8b;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e222m117/freeze.h b/src/Specific/solinas64_2e222m117/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e222m117/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e224m2e96p1/freeze.c b/src/Specific/solinas64_2e224m2e96p1/freeze.c
new file mode 100644
index 000000000..576552ae8
--- /dev/null
+++ b/src/Specific/solinas64_2e224m2e96p1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = ℤ x9 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 3 Syntax.TWord 6 Syntax.TZ 0x0;
+out[2] = x2;
+out[3] = 0x1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e224m2e96p1/freeze.h b/src/Specific/solinas64_2e224m2e96p1/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e224m2e96p1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e226m5/femul.c b/src/Specific/solinas64_2e226m5/femul.c
new file mode 100644
index 000000000..5caa29c35
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x5 * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x5 * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x5 * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint64_t x20 = (uint64_t) (x19 >> 0x39);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint128_t x23 = (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x39);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+{ uint128_t x31 = (x21 + ((uint128_t)0x5 * x29));
+{ uint64_t x32 = (uint64_t) (x31 >> 0x39);
+{ uint64_t x33 = ((uint64_t)x31 & 0x1ffffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x38);
+{ uint64_t x36 = (x34 & 0xffffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e226m5/femul.h b/src/Specific/solinas64_2e226m5/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e226m5/fesquare.c b/src/Specific/solinas64_2e226m5/fesquare.c
new file mode 100644
index 000000000..eb4aa2ef1
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x5 * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x5 * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x5 * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint64_t x11 = (uint64_t) (x10 >> 0x39);
+{ uint64_t x12 = ((uint64_t)x10 & 0x1ffffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint128_t x14 = (x13 >> 0x38);
+{ uint64_t x15 = ((uint64_t)x13 & 0xffffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x39);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x12 + ((uint128_t)0x5 * x20));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x39);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x38);
+{ uint64_t x27 = (x25 & 0xffffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e226m5/fesquare.h b/src/Specific/solinas64_2e226m5/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e226m5/freeze.c b/src/Specific/solinas64_2e226m5/freeze.c
new file mode 100644
index 000000000..86cb034c4
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffffb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e226m5/freeze.h b/src/Specific/solinas64_2e226m5/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e226m5/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e230m27/femul.c b/src/Specific/solinas64_2e230m27/femul.c
new file mode 100644
index 000000000..c031d85b1
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/femul.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11)
+{ uint128_t x16 = (((uint128_t)x5 * x14) + (((uint128_t)x7 * x15) + (((uint128_t)x9 * x13) + ((uint128_t)x8 * x11))));
+{ uint128_t x17 = ((((uint128_t)x5 * x15) + ((0x2 * ((uint128_t)x7 * x13)) + ((uint128_t)x9 * x11))) + (0x1b * (0x2 * ((uint128_t)x8 * x14))));
+{ uint128_t x18 = ((((uint128_t)x5 * x13) + ((uint128_t)x7 * x11)) + (0x1b * (((uint128_t)x9 * x14) + ((uint128_t)x8 * x15))));
+{ uint128_t x19 = (((uint128_t)x5 * x11) + (0x1b * ((0x2 * ((uint128_t)x7 * x14)) + (((uint128_t)x9 * x15) + (0x2 * ((uint128_t)x8 * x13))))));
+{ uint128_t x20 = (x19 >> 0x3a);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint128_t x23 = (x22 >> 0x39);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint128_t x26 = (x25 >> 0x3a);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x39);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+{ uint128_t x31 = (x21 + ((uint128_t)0x1b * x29));
+{ uint64_t x32 = (uint64_t) (x31 >> 0x3a);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
+{ uint64_t x34 = (x32 + x24);
+{ uint64_t x35 = (x34 >> 0x39);
+{ uint64_t x36 = (x34 & 0x1ffffffffffffff);
+out[0] = x30;
+out[1] = x35 + x27;
+out[2] = x36;
+out[3] = x33;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e230m27/femul.h b/src/Specific/solinas64_2e230m27/femul.h
new file mode 100644
index 000000000..b19a382e5
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x8, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11);
diff --git a/src/Specific/solinas64_2e230m27/fesquare.c b/src/Specific/solinas64_2e230m27/fesquare.c
new file mode 100644
index 000000000..8e75c2873
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/fesquare.c
@@ -0,0 +1,46 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x7 = (((uint128_t)x2 * x5) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x5 * x2))));
+{ uint128_t x8 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1b * (0x2 * ((uint128_t)x5 * x5))));
+{ uint128_t x9 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1b * (((uint128_t)x6 * x5) + ((uint128_t)x5 * x6))));
+{ uint128_t x10 = (((uint128_t)x2 * x2) + (0x1b * ((0x2 * ((uint128_t)x4 * x5)) + (((uint128_t)x6 * x6) + (0x2 * ((uint128_t)x5 * x4))))));
+{ uint128_t x11 = (x10 >> 0x3a);
+{ uint64_t x12 = ((uint64_t)x10 & 0x3ffffffffffffff);
+{ uint128_t x13 = (x11 + x9);
+{ uint128_t x14 = (x13 >> 0x39);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+{ uint128_t x16 = (x14 + x8);
+{ uint128_t x17 = (x16 >> 0x3a);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+{ uint128_t x19 = (x17 + x7);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x39);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+{ uint128_t x22 = (x12 + ((uint128_t)0x1b * x20));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x3a);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
+{ uint64_t x25 = (x23 + x15);
+{ uint64_t x26 = (x25 >> 0x39);
+{ uint64_t x27 = (x25 & 0x1ffffffffffffff);
+out[0] = x21;
+out[1] = x26 + x18;
+out[2] = x27;
+out[3] = x24;
+}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e230m27/fesquare.h b/src/Specific/solinas64_2e230m27/fesquare.h
new file mode 100644
index 000000000..9ba53be31
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e230m27/freeze.c b/src/Specific/solinas64_2e230m27/freeze.c
new file mode 100644
index 000000000..a85af082f
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x8;
+out[1] = uint8_t x9 = Op Syntax.SubWithGetBorrow 58 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffffe5;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e230m27/freeze.h b/src/Specific/solinas64_2e230m27/freeze.h
new file mode 100644
index 000000000..aa9b11f23
--- /dev/null
+++ b/src/Specific/solinas64_2e230m27/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x5, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e235m15/femul.c b/src/Specific/solinas64_2e235m15/femul.c
new file mode 100644
index 000000000..84bb1392e
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0xf * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0xf * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xf * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0xf * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+{ uint64_t x25 = (uint64_t) (x24 >> 0x2f);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7fffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x2f);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7fffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x2f);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7fffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x2f);
+{ uint64_t x35 = ((uint64_t)x33 & 0x7fffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x2f);
+{ uint64_t x38 = ((uint64_t)x36 & 0x7fffffffffff);
+{ uint64_t x39 = (x26 + (0xf * x37));
+{ uint64_t x40 = (x39 >> 0x2f);
+{ uint64_t x41 = (x39 & 0x7fffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x2f);
+{ uint64_t x44 = (x42 & 0x7fffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e235m15/femul.h b/src/Specific/solinas64_2e235m15/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e235m15/fesquare.c b/src/Specific/solinas64_2e235m15/fesquare.c
new file mode 100644
index 000000000..edd311a16
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0xf * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0xf * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+{ uint64_t x14 = (uint64_t) (x13 >> 0x2f);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7fffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x2f);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7fffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x2f);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x2f);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffff);
+{ uint64_t x28 = (x15 + (0xf * x26));
+{ uint64_t x29 = (x28 >> 0x2f);
+{ uint64_t x30 = (x28 & 0x7fffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x2f);
+{ uint64_t x33 = (x31 & 0x7fffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e235m15/fesquare.h b/src/Specific/solinas64_2e235m15/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e235m15/freeze.c b/src/Specific/solinas64_2e235m15/freeze.c
new file mode 100644
index 000000000..6b21e6e37
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e235m15/freeze.h b/src/Specific/solinas64_2e235m15/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e235m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e243m9/femul.c b/src/Specific/solinas64_2e243m9/femul.c
new file mode 100644
index 000000000..2a5ad5f77
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+{ uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
+{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
+{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
+{ uint64_t x30 = (uint64_t) (x29 >> 0x29);
+{ uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffff);
+{ uint128_t x32 = (x30 + x28);
+{ uint64_t x33 = (uint64_t) (x32 >> 0x28);
+{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffff);
+{ uint128_t x35 = (x33 + x27);
+{ uint64_t x36 = (uint64_t) (x35 >> 0x29);
+{ uint64_t x37 = ((uint64_t)x35 & 0x1ffffffffff);
+{ uint128_t x38 = (x36 + x26);
+{ uint64_t x39 = (uint64_t) (x38 >> 0x28);
+{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffff);
+{ uint128_t x41 = (x39 + x25);
+{ uint64_t x42 = (uint64_t) (x41 >> 0x29);
+{ uint64_t x43 = ((uint64_t)x41 & 0x1ffffffffff);
+{ uint128_t x44 = (x42 + x24);
+{ uint64_t x45 = (uint64_t) (x44 >> 0x28);
+{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffff);
+{ uint64_t x47 = (x31 + (0x9 * x45));
+{ uint64_t x48 = (x47 >> 0x29);
+{ uint64_t x49 = (x47 & 0x1ffffffffff);
+{ uint64_t x50 = (x48 + x34);
+{ uint64_t x51 = (x50 >> 0x28);
+{ uint64_t x52 = (x50 & 0xffffffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e243m9/femul.h b/src/Specific/solinas64_2e243m9/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas64_2e243m9/fesquare.c b/src/Specific/solinas64_2e243m9/fesquare.c
new file mode 100644
index 000000000..5c9ea6279
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
+{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
+{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x29);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffff);
+{ uint128_t x19 = (x17 + x15);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x28);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffff);
+{ uint128_t x22 = (x20 + x14);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x29);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffff);
+{ uint128_t x25 = (x23 + x13);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x28);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x29);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x28);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffff);
+{ uint64_t x34 = (x18 + (0x9 * x32));
+{ uint64_t x35 = (x34 >> 0x29);
+{ uint64_t x36 = (x34 & 0x1ffffffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x28);
+{ uint64_t x39 = (x37 & 0xffffffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e243m9/fesquare.h b/src/Specific/solinas64_2e243m9/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e243m9/freeze.c b/src/Specific/solinas64_2e243m9/freeze.c
new file mode 100644
index 000000000..4d2465721
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 41 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e243m9/freeze.h b/src/Specific/solinas64_2e243m9/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e243m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e251m9/femul.c b/src/Specific/solinas64_2e251m9/femul.c
new file mode 100644
index 000000000..8d52104c3
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+{ uint64_t x25 = (uint64_t) (x24 >> 0x33);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x32);
+{ uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x32);
+{ uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x32);
+{ uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x32);
+{ uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffff);
+{ uint64_t x39 = (x26 + (0x9 * x37));
+{ uint64_t x40 = (x39 >> 0x33);
+{ uint64_t x41 = (x39 & 0x7ffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x32);
+{ uint64_t x44 = (x42 & 0x3ffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e251m9/femul.h b/src/Specific/solinas64_2e251m9/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e251m9/fesquare.c b/src/Specific/solinas64_2e251m9/fesquare.c
new file mode 100644
index 000000000..8decd2e9c
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x32);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x32);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x32);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+{ uint64_t x28 = (x15 + (0x9 * x26));
+{ uint64_t x29 = (x28 >> 0x33);
+{ uint64_t x30 = (x28 & 0x7ffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x32);
+{ uint64_t x33 = (x31 & 0x3ffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e251m9/fesquare.h b/src/Specific/solinas64_2e251m9/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e251m9/freeze.c b/src/Specific/solinas64_2e251m9/freeze.c
new file mode 100644
index 000000000..01fb2e21f
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/freeze.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffff7, &x10);
+out[0] = uint64_t x13;
+out[1] = uint8_t x14 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 x11;
+out[2] = x4;
+out[3] = 0x3ffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e251m9/freeze.h b/src/Specific/solinas64_2e251m9/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e251m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e254m127x2e240m1/freeze.c b/src/Specific/solinas64_2e254m127x2e240m1/freeze.c
new file mode 100644
index 000000000..55d13c701
--- /dev/null
+++ b/src/Specific/solinas64_2e254m127x2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 43 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e254m127x2e240m1/freeze.h b/src/Specific/solinas64_2e254m127x2e240m1/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e254m127x2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m19/femul.c b/src/Specific/solinas64_2e255m19/femul.c
new file mode 100644
index 000000000..6a3d29cd8
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+{ uint64_t x25 = (uint64_t) (x24 >> 0x33);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
+{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
+{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+{ uint64_t x39 = (x26 + (0x13 * x37));
+{ uint64_t x40 = (x39 >> 0x33);
+{ uint64_t x41 = (x39 & 0x7ffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x33);
+{ uint64_t x44 = (x42 & 0x7ffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m19/femul.h b/src/Specific/solinas64_2e255m19/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e255m19/fesquare.c b/src/Specific/solinas64_2e255m19/fesquare.c
new file mode 100644
index 000000000..628d9aaa7
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+{ uint64_t x14 = (uint64_t) (x13 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+{ uint64_t x28 = (x15 + (0x13 * x26));
+{ uint64_t x29 = (x28 >> 0x33);
+{ uint64_t x30 = (x28 & 0x7ffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = (x31 & 0x7ffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m19/fesquare.h b/src/Specific/solinas64_2e255m19/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m19/freeze.c b/src/Specific/solinas64_2e255m19/freeze.c
new file mode 100644
index 000000000..1cebfd785
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/freeze.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0x7ffffffffffed);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0x7ffffffffffff);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0x7ffffffffffff);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0x7ffffffffffff);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x7ffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m19/freeze.h b/src/Specific/solinas64_2e255m19/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e255m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c
new file mode 100644
index 000000000..72c26597d
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.c
@@ -0,0 +1,64 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (((uint128_t)x10 * x18) + ((0x2 * ((uint128_t)x10 * x18)) + (0x10 * ((uint128_t)x10 * x18)))));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + ((((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)) + ((0x2 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) + (0x10 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + ((((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))) + ((0x2 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) + (0x10 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + ((((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15)))) + ((0x2 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))) + (0x10 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))))));
+{ uint64_t x25 = (uint64_t) (x20 >> 0x33);
+{ uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
+{ uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+{ uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+{ uint128_t x33 = (((uint128_t)0x8000000000000 * x31) + x32);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
+{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+{ uint128_t x36 = (x24 + (x34 + ((0x2 * x34) + (0x10 * x34))));
+{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
+{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+{ uint128_t x39 = (x37 + x23);
+{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+{ uint128_t x42 = (x40 + x22);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x33);
+{ uint64_t x44 = ((uint64_t)x42 & 0x7ffffffffffff);
+{ uint128_t x45 = (x43 + x21);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x33);
+{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
+{ uint64_t x48 = (x46 + x35);
+{ uint64_t x49 = (x48 >> 0x33);
+{ uint64_t x50 = (x48 & 0x7ffffffffffff);
+{ uint64_t x51 = (x38 + (x49 + ((0x2 * x49) + (0x10 * x49))));
+{ uint64_t x52 = (x51 >> 0x33);
+{ uint64_t x53 = (x51 & 0x7ffffffffffff);
+{ uint64_t x54 = (x53 >> 0x33);
+{ uint64_t x55 = (x53 & 0x7ffffffffffff);
+{ uint64_t x56 = (x55 >> 0x33);
+{ uint64_t x57 = (x55 & 0x7ffffffffffff);
+out[0] = x50;
+out[1] = x47;
+out[2] = x44;
+out[3] = x56 + x54 + x52 + x41;
+out[4] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/femul.h b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c
new file mode 100644
index 000000000..29c0cc597
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.c
@@ -0,0 +1,64 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x7 * x7) + ((0x2 * ((uint128_t)x7 * x7)) + (0x10 * ((uint128_t)x7 * x7)))));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + ((((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)) + ((0x2 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) + (0x10 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + ((((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))) + ((0x2 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) + (0x10 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + ((((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4)))) + ((0x2 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))) + (0x10 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))))));
+{ uint64_t x14 = (uint64_t) (x9 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
+{ uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+{ uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ uint128_t x22 = (((uint128_t)0x8000000000000 * x20) + x21);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x13 + (x23 + ((0x2 * x23) + (0x10 * x23))));
+{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x33);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+{ uint128_t x34 = (x32 + x10);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x33);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+{ uint64_t x37 = (x35 + x24);
+{ uint64_t x38 = (x37 >> 0x33);
+{ uint64_t x39 = (x37 & 0x7ffffffffffff);
+{ uint64_t x40 = (x27 + (x38 + ((0x2 * x38) + (0x10 * x38))));
+{ uint64_t x41 = (x40 >> 0x33);
+{ uint64_t x42 = (x40 & 0x7ffffffffffff);
+{ uint64_t x43 = (x42 >> 0x33);
+{ uint64_t x44 = (x42 & 0x7ffffffffffff);
+{ uint64_t x45 = (x44 >> 0x33);
+{ uint64_t x46 = (x44 & 0x7ffffffffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x45 + x43 + x41 + x30;
+out[4] = x46;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.h b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c
new file mode 100644
index 000000000..1cebfd785
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0x7ffffffffffed);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0x7ffffffffffff);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0x7ffffffffffff);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0x7ffffffffffff);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x7ffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.h b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e255m2e4m2e1m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m765/femul.c b/src/Specific/solinas64_2e255m765/femul.c
new file mode 100644
index 000000000..010cb8eba
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x2fd * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x2fd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x2fd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x2fd * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+{ uint128_t x25 = (x24 >> 0x33);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint128_t x28 = (x27 >> 0x33);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint128_t x31 = (x30 >> 0x33);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint128_t x34 = (x33 >> 0x33);
+{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
+{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+{ uint128_t x39 = (x26 + ((uint128_t)0x2fd * x37));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x33);
+{ uint64_t x44 = (x42 & 0x7ffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m765/femul.h b/src/Specific/solinas64_2e255m765/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e255m765/fesquare.c b/src/Specific/solinas64_2e255m765/fesquare.c
new file mode 100644
index 000000000..7a3de6799
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x2fd * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x2fd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x2fd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x2fd * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+{ uint128_t x14 = (x13 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint128_t x17 = (x16 >> 0x33);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint128_t x20 = (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint128_t x23 = (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+{ uint128_t x28 = (x15 + ((uint128_t)0x2fd * x26));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x33);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = (x31 & 0x7ffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m765/fesquare.h b/src/Specific/solinas64_2e255m765/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e255m765/freeze.c b/src/Specific/solinas64_2e255m765/freeze.c
new file mode 100644
index 000000000..0ae10d49e
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/freeze.c
@@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x10; uint8_t x11 = _subborrow_u51(0x0, x2, 0x7fffffffffd03, &x10);
+{ uint64_t x13; uint8_t x14 = _subborrow_u51(x11, x4, 0x7ffffffffffff, &x13);
+{ uint64_t x16; uint8_t x17 = _subborrow_u51(x14, x6, 0x7ffffffffffff, &x16);
+{ uint64_t x19; uint8_t x20 = _subborrow_u51(x17, x8, 0x7ffffffffffff, &x19);
+{ uint64_t x22; uint8_t x23 = _subborrow_u51(x20, x7, 0x7ffffffffffff, &x22);
+{ uint64_t x24 = (uint64_t)cmovznz(x23, 0x0, 0xffffffffffffffffL);
+{ uint64_t x25 = (x24 & 0x7fffffffffd03);
+{ uint64_t x27; uint8_t x28 = _addcarryx_u51(0x0, x10, x25, &x27);
+{ uint64_t x29 = (x24 & 0x7ffffffffffff);
+{ uint64_t x31; uint8_t x32 = _addcarryx_u51(x28, x13, x29, &x31);
+{ uint64_t x33 = (x24 & 0x7ffffffffffff);
+{ uint64_t x35; uint8_t x36 = _addcarryx_u51(x32, x16, x33, &x35);
+{ uint64_t x37 = (x24 & 0x7ffffffffffff);
+{ uint64_t x39; uint8_t x40 = _addcarryx_u51(x36, x19, x37, &x39);
+{ uint64_t x41 = (x24 & 0x7ffffffffffff);
+{ uint64_t x43; uint8_t _ = _addcarryx_u51(x40, x22, x41, &x43);
+out[0] = x43;
+out[1] = x39;
+out[2] = x35;
+out[3] = x31;
+out[4] = x27;
+}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e255m765/freeze.h b/src/Specific/solinas64_2e255m765/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e255m765/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m189/femul.c b/src/Specific/solinas64_2e256m189/femul.c
new file mode 100644
index 000000000..4c4e7a2e4
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0xbd * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0xbd * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0xbd * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0xbd * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+{ uint128_t x25 = (x24 >> 0x34);
+{ uint64_t x26 = ((uint64_t)x24 & 0xfffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x33);
+{ uint64_t x35 = ((uint64_t)x33 & 0x7ffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x33);
+{ uint64_t x38 = ((uint64_t)x36 & 0x7ffffffffffff);
+{ uint128_t x39 = (x26 + ((uint128_t)0xbd * x37));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x34);
+{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x33);
+{ uint64_t x44 = (x42 & 0x7ffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e256m189/femul.h b/src/Specific/solinas64_2e256m189/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e256m189/fesquare.c b/src/Specific/solinas64_2e256m189/fesquare.c
new file mode 100644
index 000000000..cc8808dc7
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbd * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbd * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbd * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0xbd * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+{ uint128_t x14 = (x13 >> 0x34);
+{ uint64_t x15 = ((uint64_t)x13 & 0xfffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x33);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffff);
+{ uint128_t x28 = (x15 + ((uint128_t)0xbd * x26));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x34);
+{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = (x31 & 0x7ffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e256m189/fesquare.h b/src/Specific/solinas64_2e256m189/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m189/freeze.c b/src/Specific/solinas64_2e256m189/freeze.c
new file mode 100644
index 000000000..2278942df
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffff43;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m189/freeze.h b/src/Specific/solinas64_2e256m189/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e256m189/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c
new file mode 100644
index 000000000..1c76aa42f
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.h b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e224p2e192p2e96m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m2e32m977/femul.c b/src/Specific/solinas64_2e256m2e32m977/femul.c
new file mode 100644
index 000000000..b6759718f
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/femul.c
@@ -0,0 +1,59 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ ℤ x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) +ℤ ((0x3d1 * ((uint128_t)x10 * x18)) +ℤ (0x100000000 *ℤ ((uint128_t)x10 * x18))));
+{ ℤ x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) +ℤ ((0x3d1 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))) +ℤ (0x100000000 *ℤ (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19)))));
+{ ℤ x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) +ℤ ((0x3d1 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))) +ℤ (0x100000000 *ℤ (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17))))));
+{ ℤ x24 = (((uint128_t)x5 * x13) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15))))))));
+{ uint64_t x25 = (uint64_t) (x20 >> 0x33);
+{ uint64_t x26 = ((uint64_t)x20 & 0x7ffffffffffff);
+{ uint128_t x27 = (((uint128_t)0x8000000000000 * x25) + x26);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x33);
+{ uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff);
+{ uint128_t x30 = (((uint128_t)0x8000000000000 * x28) + x29);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x33);
+{ uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff);
+{ ℤ x33 = (x24 +ℤ (((uint128_t)0x3d1 * x31) + ((uint128_t)0x100000000 * x31)));
+{ uint128_t x34 = (x33 >> 0x34);
+{ uint64_t x35 = (x33 & 0xfffffffffffff);
+{ ℤ x36 = (x34 +ℤ x23);
+{ uint128_t x37 = (x36 >> 0x33);
+{ uint64_t x38 = (x36 & 0x7ffffffffffff);
+{ ℤ x39 = (x37 +ℤ x22);
+{ uint128_t x40 = (x39 >> 0x33);
+{ uint64_t x41 = (x39 & 0x7ffffffffffff);
+{ ℤ x42 = (x40 +ℤ x21);
+{ uint128_t x43 = (x42 >> 0x33);
+{ uint64_t x44 = (x42 & 0x7ffffffffffff);
+{ uint128_t x45 = (x43 + x32);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x33);
+{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffff);
+{ uint128_t x48 = (x35 + ((0x3d1 * x46) + ((uint128_t)0x100000000 * x46)));
+{ uint64_t x49 = (uint64_t) (x48 >> 0x34);
+{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
+{ uint64_t x51 = (x50 >> 0x34);
+{ uint64_t x52 = (x50 & 0xfffffffffffff);
+out[0] = x47;
+out[1] = x44;
+out[2] = x41;
+out[3] = x51 + x49 + x38;
+out[4] = x52;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e256m2e32m977/femul.h b/src/Specific/solinas64_2e256m2e32m977/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e256m2e32m977/fesquare.c b/src/Specific/solinas64_2e256m2e32m977/fesquare.c
new file mode 100644
index 000000000..5a7485a7c
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/fesquare.c
@@ -0,0 +1,59 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ ℤ x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) +ℤ ((0x3d1 * ((uint128_t)x7 * x7)) +ℤ (0x100000000 *ℤ ((uint128_t)x7 * x7))));
+{ ℤ x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) +ℤ ((0x3d1 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))) +ℤ (0x100000000 *ℤ (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8)))));
+{ ℤ x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ ((0x3d1 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))) +ℤ (0x100000000 *ℤ (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6))))));
+{ ℤ x13 = (((uint128_t)x2 * x2) +ℤ ((0x3d1 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))) +ℤ (0x100000000 *ℤ ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4))))))));
+{ uint64_t x14 = (uint64_t) (x9 >> 0x33);
+{ uint64_t x15 = ((uint64_t)x9 & 0x7ffffffffffff);
+{ uint128_t x16 = (((uint128_t)0x8000000000000 * x14) + x15);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x33);
+{ uint64_t x18 = ((uint64_t)x16 & 0x7ffffffffffff);
+{ uint128_t x19 = (((uint128_t)0x8000000000000 * x17) + x18);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x33);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7ffffffffffff);
+{ ℤ x22 = (x13 +ℤ (((uint128_t)0x3d1 * x20) + ((uint128_t)0x100000000 * x20)));
+{ uint128_t x23 = (x22 >> 0x34);
+{ uint64_t x24 = (x22 & 0xfffffffffffff);
+{ ℤ x25 = (x23 +ℤ x12);
+{ uint128_t x26 = (x25 >> 0x33);
+{ uint64_t x27 = (x25 & 0x7ffffffffffff);
+{ ℤ x28 = (x26 +ℤ x11);
+{ uint128_t x29 = (x28 >> 0x33);
+{ uint64_t x30 = (x28 & 0x7ffffffffffff);
+{ ℤ x31 = (x29 +ℤ x10);
+{ uint128_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = (x31 & 0x7ffffffffffff);
+{ uint128_t x34 = (x32 + x21);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x33);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+{ uint128_t x37 = (x24 + ((0x3d1 * x35) + ((uint128_t)0x100000000 * x35)));
+{ uint64_t x38 = (uint64_t) (x37 >> 0x34);
+{ uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
+{ uint64_t x40 = (x39 >> 0x34);
+{ uint64_t x41 = (x39 & 0xfffffffffffff);
+out[0] = x36;
+out[1] = x33;
+out[2] = x30;
+out[3] = x40 + x38 + x27;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e256m2e32m977/fesquare.h b/src/Specific/solinas64_2e256m2e32m977/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m2e32m977/freeze.c b/src/Specific/solinas64_2e256m2e32m977/freeze.c
new file mode 100644
index 000000000..5c8088e2e
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffefffffc2f;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m2e32m977/freeze.h b/src/Specific/solinas64_2e256m2e32m977/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e256m2e32m977/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e256m88x2e240m1/freeze.c b/src/Specific/solinas64_2e256m88x2e240m1/freeze.c
new file mode 100644
index 000000000..1c76aa42f
--- /dev/null
+++ b/src/Specific/solinas64_2e256m88x2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e256m88x2e240m1/freeze.h b/src/Specific/solinas64_2e256m88x2e240m1/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e256m88x2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e266m3/femul.c b/src/Specific/solinas64_2e266m3/femul.c
new file mode 100644
index 000000000..8deb45986
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x3 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x3 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x3 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x3 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+{ uint64_t x25 = (uint64_t) (x24 >> 0x36);
+{ uint64_t x26 = ((uint64_t)x24 & 0x3fffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint64_t x28 = (uint64_t) (x27 >> 0x35);
+{ uint64_t x29 = ((uint64_t)x27 & 0x1fffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint64_t x31 = (uint64_t) (x30 >> 0x35);
+{ uint64_t x32 = ((uint64_t)x30 & 0x1fffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x35);
+{ uint64_t x35 = ((uint64_t)x33 & 0x1fffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x35);
+{ uint64_t x38 = ((uint64_t)x36 & 0x1fffffffffffff);
+{ uint64_t x39 = (x26 + (0x3 * x37));
+{ uint64_t x40 = (x39 >> 0x36);
+{ uint64_t x41 = (x39 & 0x3fffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x35);
+{ uint64_t x44 = (x42 & 0x1fffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e266m3/femul.h b/src/Specific/solinas64_2e266m3/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e266m3/fesquare.c b/src/Specific/solinas64_2e266m3/fesquare.c
new file mode 100644
index 000000000..d6f5c61ec
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x3 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+{ uint64_t x14 = (uint64_t) (x13 >> 0x36);
+{ uint64_t x15 = ((uint64_t)x13 & 0x3fffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint64_t x17 = (uint64_t) (x16 >> 0x35);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1fffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x35);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1fffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+{ uint64_t x28 = (x15 + (0x3 * x26));
+{ uint64_t x29 = (x28 >> 0x36);
+{ uint64_t x30 = (x28 & 0x3fffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x35);
+{ uint64_t x33 = (x31 & 0x1fffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e266m3/fesquare.h b/src/Specific/solinas64_2e266m3/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e266m3/freeze.c b/src/Specific/solinas64_2e266m3/freeze.c
new file mode 100644
index 000000000..34a56b26e
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e266m3/freeze.h b/src/Specific/solinas64_2e266m3/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e266m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e285m9/femul.c b/src/Specific/solinas64_2e285m9/femul.c
new file mode 100644
index 000000000..0a43f11ee
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + (((uint128_t)x11 * x15) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + (((uint128_t)x9 * x15) + ((uint128_t)x11 * x13)))) + (0x9 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + (((uint128_t)x7 * x15) + ((uint128_t)x9 * x13))) + (0x9 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x9 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x9 * (((uint128_t)x7 * x18) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x10 * x15))))));
+{ uint128_t x25 = (x24 >> 0x39);
+{ uint64_t x26 = ((uint64_t)x24 & 0x1ffffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint128_t x28 = (x27 >> 0x39);
+{ uint64_t x29 = ((uint64_t)x27 & 0x1ffffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint128_t x31 = (x30 >> 0x39);
+{ uint64_t x32 = ((uint64_t)x30 & 0x1ffffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint128_t x34 = (x33 >> 0x39);
+{ uint64_t x35 = ((uint64_t)x33 & 0x1ffffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x39);
+{ uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
+{ uint128_t x39 = (x26 + ((uint128_t)0x9 * x37));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x39);
+{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x39);
+{ uint64_t x44 = (x42 & 0x1ffffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e285m9/femul.h b/src/Specific/solinas64_2e285m9/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e285m9/fesquare.c b/src/Specific/solinas64_2e285m9/fesquare.c
new file mode 100644
index 000000000..a15f688d5
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x9 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x9 * (((uint128_t)x4 * x7) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((uint128_t)x7 * x4))))));
+{ uint128_t x14 = (x13 >> 0x39);
+{ uint64_t x15 = ((uint64_t)x13 & 0x1ffffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint128_t x17 = (x16 >> 0x39);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint128_t x20 = (x19 >> 0x39);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1ffffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint128_t x23 = (x22 >> 0x39);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x39);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+{ uint128_t x28 = (x15 + ((uint128_t)0x9 * x26));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x39);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x39);
+{ uint64_t x33 = (x31 & 0x1ffffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e285m9/fesquare.h b/src/Specific/solinas64_2e285m9/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e285m9/freeze.c b/src/Specific/solinas64_2e285m9/freeze.c
new file mode 100644
index 000000000..aec265935
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e285m9/freeze.h b/src/Specific/solinas64_2e285m9/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e285m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e291m19/femul.c b/src/Specific/solinas64_2e291m19/femul.c
new file mode 100644
index 000000000..7b2ae8b54
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/femul.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13)
+{ uint128_t x20 = (((uint128_t)x5 * x18) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((0x2 * ((uint128_t)x11 * x15)) + ((uint128_t)x10 * x13)))));
+{ uint128_t x21 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((0x2 * ((uint128_t)x9 * x15)) + ((uint128_t)x11 * x13)))) + (0x13 * ((uint128_t)x10 * x18)));
+{ uint128_t x22 = ((((uint128_t)x5 * x17) + ((0x2 * ((uint128_t)x7 * x15)) + ((uint128_t)x9 * x13))) + (0x13 * (((uint128_t)x11 * x18) + ((uint128_t)x10 * x19))));
+{ uint128_t x23 = ((((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)) + (0x13 * (((uint128_t)x9 * x18) + (((uint128_t)x11 * x19) + ((uint128_t)x10 * x17)))));
+{ uint128_t x24 = (((uint128_t)x5 * x13) + (0x13 * ((0x2 * ((uint128_t)x7 * x18)) + ((0x2 * ((uint128_t)x9 * x19)) + ((0x2 * ((uint128_t)x11 * x17)) + (0x2 * ((uint128_t)x10 * x15)))))));
+{ uint128_t x25 = (x24 >> 0x3b);
+{ uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffffff);
+{ uint128_t x27 = (x25 + x23);
+{ uint128_t x28 = (x27 >> 0x3a);
+{ uint64_t x29 = ((uint64_t)x27 & 0x3ffffffffffffff);
+{ uint128_t x30 = (x28 + x22);
+{ uint128_t x31 = (x30 >> 0x3a);
+{ uint64_t x32 = ((uint64_t)x30 & 0x3ffffffffffffff);
+{ uint128_t x33 = (x31 + x21);
+{ uint128_t x34 = (x33 >> 0x3a);
+{ uint64_t x35 = ((uint64_t)x33 & 0x3ffffffffffffff);
+{ uint128_t x36 = (x34 + x20);
+{ uint128_t x37 = (x36 >> 0x3a);
+{ uint64_t x38 = ((uint64_t)x36 & 0x3ffffffffffffff);
+{ uint128_t x39 = (x26 + (0x13 * x37));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x3b);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
+{ uint64_t x42 = (x40 + x29);
+{ uint64_t x43 = (x42 >> 0x3a);
+{ uint64_t x44 = (x42 & 0x3ffffffffffffff);
+out[0] = x38;
+out[1] = x35;
+out[2] = x43 + x32;
+out[3] = x44;
+out[4] = x41;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e291m19/femul.h b/src/Specific/solinas64_2e291m19/femul.h
new file mode 100644
index 000000000..0e096575c
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x10, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13);
diff --git a/src/Specific/solinas64_2e291m19/fesquare.c b/src/Specific/solinas64_2e291m19/fesquare.c
new file mode 100644
index 000000000..2cf8a74f3
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/fesquare.c
@@ -0,0 +1,51 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x9 = (((uint128_t)x2 * x7) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x7 * x2)))));
+{ uint128_t x10 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * ((uint128_t)x7 * x7)));
+{ uint128_t x11 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x7) + ((uint128_t)x7 * x8))));
+{ uint128_t x12 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x7) + (((uint128_t)x8 * x8) + ((uint128_t)x7 * x6)))));
+{ uint128_t x13 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x7)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + (0x2 * ((uint128_t)x7 * x4)))))));
+{ uint128_t x14 = (x13 >> 0x3b);
+{ uint64_t x15 = ((uint64_t)x13 & 0x7ffffffffffffff);
+{ uint128_t x16 = (x14 + x12);
+{ uint128_t x17 = (x16 >> 0x3a);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3ffffffffffffff);
+{ uint128_t x19 = (x17 + x11);
+{ uint128_t x20 = (x19 >> 0x3a);
+{ uint64_t x21 = ((uint64_t)x19 & 0x3ffffffffffffff);
+{ uint128_t x22 = (x20 + x10);
+{ uint128_t x23 = (x22 >> 0x3a);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3ffffffffffffff);
+{ uint128_t x25 = (x23 + x9);
+{ uint128_t x26 = (x25 >> 0x3a);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+{ uint128_t x28 = (x15 + (0x13 * x26));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x3b);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+{ uint64_t x31 = (x29 + x18);
+{ uint64_t x32 = (x31 >> 0x3a);
+{ uint64_t x33 = (x31 & 0x3ffffffffffffff);
+out[0] = x27;
+out[1] = x24;
+out[2] = x32 + x21;
+out[3] = x33;
+out[4] = x30;
+}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[5];
diff --git a/src/Specific/solinas64_2e291m19/fesquare.h b/src/Specific/solinas64_2e291m19/fesquare.h
new file mode 100644
index 000000000..2cfd2d5a8
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e291m19/freeze.c b/src/Specific/solinas64_2e291m19/freeze.c
new file mode 100644
index 000000000..14406c6fe
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x10;
+out[1] = uint8_t x11 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e291m19/freeze.h b/src/Specific/solinas64_2e291m19/freeze.h
new file mode 100644
index 000000000..d296b2d57
--- /dev/null
+++ b/src/Specific/solinas64_2e291m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x7, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e321m9/femul.c b/src/Specific/solinas64_2e321m9/femul.c
new file mode 100644
index 000000000..ac91fec15
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+{ uint128_t x25 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + (((uint128_t)x9 * x19) + ((0x2 * ((uint128_t)x11 * x17)) + ((uint128_t)x13 * x15))))) + (0x9 * (0x2 * ((uint128_t)x12 * x22))));
+{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x9 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0x9 * ((0x2 * ((uint128_t)x11 * x22)) + (((uint128_t)x13 * x23) + (0x2 * ((uint128_t)x12 * x21))))));
+{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x9 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x9 * ((0x2 * ((uint128_t)x7 * x22)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + (((uint128_t)x13 * x19) + (0x2 * ((uint128_t)x12 * x17))))))));
+{ uint64_t x30 = (uint64_t) (x29 >> 0x36);
+{ uint64_t x31 = ((uint64_t)x29 & 0x3fffffffffffff);
+{ uint128_t x32 = (x30 + x28);
+{ uint64_t x33 = (uint64_t) (x32 >> 0x35);
+{ uint64_t x34 = ((uint64_t)x32 & 0x1fffffffffffff);
+{ uint128_t x35 = (x33 + x27);
+{ uint64_t x36 = (uint64_t) (x35 >> 0x36);
+{ uint64_t x37 = ((uint64_t)x35 & 0x3fffffffffffff);
+{ uint128_t x38 = (x36 + x26);
+{ uint64_t x39 = (uint64_t) (x38 >> 0x35);
+{ uint64_t x40 = ((uint64_t)x38 & 0x1fffffffffffff);
+{ uint128_t x41 = (x39 + x25);
+{ uint64_t x42 = (uint64_t) (x41 >> 0x36);
+{ uint64_t x43 = ((uint64_t)x41 & 0x3fffffffffffff);
+{ uint128_t x44 = (x42 + x24);
+{ uint64_t x45 = (uint64_t) (x44 >> 0x35);
+{ uint64_t x46 = ((uint64_t)x44 & 0x1fffffffffffff);
+{ uint64_t x47 = (x31 + (0x9 * x45));
+{ uint64_t x48 = (x47 >> 0x36);
+{ uint64_t x49 = (x47 & 0x3fffffffffffff);
+{ uint64_t x50 = (x48 + x34);
+{ uint64_t x51 = (x50 >> 0x35);
+{ uint64_t x52 = (x50 & 0x1fffffffffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e321m9/femul.h b/src/Specific/solinas64_2e321m9/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas64_2e321m9/fesquare.c b/src/Specific/solinas64_2e321m9/fesquare.c
new file mode 100644
index 000000000..8312f2988
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x9 * (0x2 * ((uint128_t)x9 * x9))));
+{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x9 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x9 * ((0x2 * ((uint128_t)x8 * x9)) + (((uint128_t)x10 * x10) + (0x2 * ((uint128_t)x9 * x8))))));
+{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x9 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x9 * ((0x2 * ((uint128_t)x4 * x9)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + (0x2 * ((uint128_t)x9 * x4))))))));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x36);
+{ uint64_t x18 = ((uint64_t)x16 & 0x3fffffffffffff);
+{ uint128_t x19 = (x17 + x15);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x35);
+{ uint64_t x21 = ((uint64_t)x19 & 0x1fffffffffffff);
+{ uint128_t x22 = (x20 + x14);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x36);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
+{ uint128_t x25 = (x23 + x13);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x35);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1fffffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x36);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x35);
+{ uint64_t x33 = ((uint64_t)x31 & 0x1fffffffffffff);
+{ uint64_t x34 = (x18 + (0x9 * x32));
+{ uint64_t x35 = (x34 >> 0x36);
+{ uint64_t x36 = (x34 & 0x3fffffffffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x35);
+{ uint64_t x39 = (x37 & 0x1fffffffffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e321m9/fesquare.h b/src/Specific/solinas64_2e321m9/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e321m9/freeze.c b/src/Specific/solinas64_2e321m9/freeze.c
new file mode 100644
index 000000000..c20a7e799
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 54 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3ffffffffffff7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e321m9/freeze.h b/src/Specific/solinas64_2e321m9/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e321m9/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e322m2e161m1/freeze.c b/src/Specific/solinas64_2e322m2e161m1/freeze.c
new file mode 100644
index 000000000..71d073624
--- /dev/null
+++ b/src/Specific/solinas64_2e322m2e161m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 46 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e322m2e161m1/freeze.h b/src/Specific/solinas64_2e322m2e161m1/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas64_2e322m2e161m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e336m17/femul.c b/src/Specific/solinas64_2e336m17/femul.c
new file mode 100644
index 000000000..d1bdecab7
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x11 * ((uint128_t)x12 * x22)));
+{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x11 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+{ uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x11 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
+{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x11 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x11 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
+{ uint128_t x30 = (x29 >> 0x38);
+{ uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
+{ uint128_t x32 = (x30 + x28);
+{ uint128_t x33 = (x32 >> 0x38);
+{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+{ uint128_t x35 = (x33 + x27);
+{ uint128_t x36 = (x35 >> 0x38);
+{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+{ uint128_t x38 = (x36 + x26);
+{ uint128_t x39 = (x38 >> 0x38);
+{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
+{ uint128_t x41 = (x39 + x25);
+{ uint64_t x42 = (uint64_t) (x41 >> 0x38);
+{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+{ uint128_t x44 = (x42 + x24);
+{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
+{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+{ uint128_t x47 = (x31 + ((uint128_t)0x11 * x45));
+{ uint64_t x48 = (uint64_t) (x47 >> 0x38);
+{ uint64_t x49 = ((uint64_t)x47 & 0xffffffffffffff);
+{ uint64_t x50 = (x48 + x34);
+{ uint64_t x51 = (x50 >> 0x38);
+{ uint64_t x52 = (x50 & 0xffffffffffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e336m17/femul.h b/src/Specific/solinas64_2e336m17/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas64_2e336m17/fesquare.c b/src/Specific/solinas64_2e336m17/fesquare.c
new file mode 100644
index 000000000..7f903cd78
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * ((uint128_t)x9 * x9)));
+{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+{ uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
+{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
+{ uint128_t x17 = (x16 >> 0x38);
+{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+{ uint128_t x19 = (x17 + x15);
+{ uint128_t x20 = (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x20 + x14);
+{ uint128_t x23 = (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint128_t x25 = (x23 + x13);
+{ uint128_t x26 = (x25 >> 0x38);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+{ uint128_t x34 = (x18 + ((uint128_t)0x11 * x32));
+{ uint64_t x35 = (uint64_t) (x34 >> 0x38);
+{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x38);
+{ uint64_t x39 = (x37 & 0xffffffffffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e336m17/fesquare.h b/src/Specific/solinas64_2e336m17/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e336m17/freeze.c b/src/Specific/solinas64_2e336m17/freeze.c
new file mode 100644
index 000000000..7f9c2afb6
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e336m17/freeze.h b/src/Specific/solinas64_2e336m17/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e336m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e336m3/femul.c b/src/Specific/solinas64_2e336m3/femul.c
new file mode 100644
index 000000000..f0bc0c880
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint128_t x24 = (((uint128_t)x5 * x22) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + (((uint128_t)x13 * x17) + ((uint128_t)x12 * x15))))));
+{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0x3 * ((uint128_t)x12 * x22)));
+{ uint128_t x26 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + (((uint128_t)x9 * x17) + ((uint128_t)x11 * x15)))) + (0x3 * (((uint128_t)x13 * x22) + ((uint128_t)x12 * x23))));
+{ uint128_t x27 = ((((uint128_t)x5 * x19) + (((uint128_t)x7 * x17) + ((uint128_t)x9 * x15))) + (0x3 * (((uint128_t)x11 * x22) + (((uint128_t)x13 * x23) + ((uint128_t)x12 * x21)))));
+{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0x3 * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+{ uint128_t x29 = (((uint128_t)x5 * x15) + (0x3 * (((uint128_t)x7 * x22) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x12 * x17)))))));
+{ uint64_t x30 = (uint64_t) (x29 >> 0x38);
+{ uint64_t x31 = ((uint64_t)x29 & 0xffffffffffffff);
+{ uint128_t x32 = (x30 + x28);
+{ uint64_t x33 = (uint64_t) (x32 >> 0x38);
+{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+{ uint128_t x35 = (x33 + x27);
+{ uint64_t x36 = (uint64_t) (x35 >> 0x38);
+{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+{ uint128_t x38 = (x36 + x26);
+{ uint64_t x39 = (uint64_t) (x38 >> 0x38);
+{ uint64_t x40 = ((uint64_t)x38 & 0xffffffffffffff);
+{ uint128_t x41 = (x39 + x25);
+{ uint64_t x42 = (uint64_t) (x41 >> 0x38);
+{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+{ uint128_t x44 = (x42 + x24);
+{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
+{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+{ uint64_t x47 = (x31 + (0x3 * x45));
+{ uint64_t x48 = (x47 >> 0x38);
+{ uint64_t x49 = (x47 & 0xffffffffffffff);
+{ uint64_t x50 = (x48 + x34);
+{ uint64_t x51 = (x50 >> 0x38);
+{ uint64_t x52 = (x50 & 0xffffffffffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e336m3/femul.h b/src/Specific/solinas64_2e336m3/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas64_2e336m3/fesquare.c b/src/Specific/solinas64_2e336m3/fesquare.c
new file mode 100644
index 000000000..2a515d77c
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x11 = (((uint128_t)x2 * x9) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x9 * x2))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x3 * ((uint128_t)x9 * x9)));
+{ uint128_t x13 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x9) + ((uint128_t)x9 * x10))));
+{ uint128_t x14 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x3 * (((uint128_t)x8 * x9) + (((uint128_t)x10 * x10) + ((uint128_t)x9 * x8)))));
+{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+{ uint128_t x16 = (((uint128_t)x2 * x2) + (0x3 * (((uint128_t)x4 * x9) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + ((uint128_t)x9 * x4)))))));
+{ uint64_t x17 = (uint64_t) (x16 >> 0x38);
+{ uint64_t x18 = ((uint64_t)x16 & 0xffffffffffffff);
+{ uint128_t x19 = (x17 + x15);
+{ uint64_t x20 = (uint64_t) (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x20 + x14);
+{ uint64_t x23 = (uint64_t) (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint128_t x25 = (x23 + x13);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x38);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+{ uint64_t x34 = (x18 + (0x3 * x32));
+{ uint64_t x35 = (x34 >> 0x38);
+{ uint64_t x36 = (x34 & 0xffffffffffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x38);
+{ uint64_t x39 = (x37 & 0xffffffffffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e336m3/fesquare.h b/src/Specific/solinas64_2e336m3/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e336m3/freeze.c b/src/Specific/solinas64_2e336m3/freeze.c
new file mode 100644
index 000000000..000e7a80c
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e336m3/freeze.h b/src/Specific/solinas64_2e336m3/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e336m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e338m15/femul.c b/src/Specific/solinas64_2e338m15/femul.c
new file mode 100644
index 000000000..91d8d237e
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/femul.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15)
+{ uint128_t x24 = (((uint128_t)x5 * x22) + ((0x2 * ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((0x2 * ((uint128_t)x13 * x17)) + ((uint128_t)x12 * x15))))));
+{ uint128_t x25 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + (((uint128_t)x11 * x17) + ((uint128_t)x13 * x15))))) + (0xf * ((uint128_t)x12 * x22)));
+{ uint128_t x26 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((0x2 * ((uint128_t)x9 * x17)) + ((uint128_t)x11 * x15)))) + (0xf * ((0x2 * ((uint128_t)x13 * x22)) + (0x2 * ((uint128_t)x12 * x23)))));
+{ uint128_t x27 = ((((uint128_t)x5 * x19) + ((0x2 * ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15))) + (0xf * (((uint128_t)x11 * x22) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x12 * x21)))));
+{ uint128_t x28 = ((((uint128_t)x5 * x17) + ((uint128_t)x7 * x15)) + (0xf * (((uint128_t)x9 * x22) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x12 * x19))))));
+{ uint128_t x29 = (((uint128_t)x5 * x15) + (0xf * ((0x2 * ((uint128_t)x7 * x22)) + ((0x2 * ((uint128_t)x9 * x23)) + (((uint128_t)x11 * x21) + ((0x2 * ((uint128_t)x13 * x19)) + (0x2 * ((uint128_t)x12 * x17))))))));
+{ uint128_t x30 = (x29 >> 0x39);
+{ uint64_t x31 = ((uint64_t)x29 & 0x1ffffffffffffff);
+{ uint128_t x32 = (x30 + x28);
+{ uint128_t x33 = (x32 >> 0x38);
+{ uint64_t x34 = ((uint64_t)x32 & 0xffffffffffffff);
+{ uint128_t x35 = (x33 + x27);
+{ uint128_t x36 = (x35 >> 0x38);
+{ uint64_t x37 = ((uint64_t)x35 & 0xffffffffffffff);
+{ uint128_t x38 = (x36 + x26);
+{ uint128_t x39 = (x38 >> 0x39);
+{ uint64_t x40 = ((uint64_t)x38 & 0x1ffffffffffffff);
+{ uint128_t x41 = (x39 + x25);
+{ uint128_t x42 = (x41 >> 0x38);
+{ uint64_t x43 = ((uint64_t)x41 & 0xffffffffffffff);
+{ uint128_t x44 = (x42 + x24);
+{ uint64_t x45 = (uint64_t) (x44 >> 0x38);
+{ uint64_t x46 = ((uint64_t)x44 & 0xffffffffffffff);
+{ uint128_t x47 = (x31 + ((uint128_t)0xf * x45));
+{ uint64_t x48 = (uint64_t) (x47 >> 0x39);
+{ uint64_t x49 = ((uint64_t)x47 & 0x1ffffffffffffff);
+{ uint64_t x50 = (x48 + x34);
+{ uint64_t x51 = (x50 >> 0x38);
+{ uint64_t x52 = (x50 & 0xffffffffffffff);
+out[0] = x46;
+out[1] = x43;
+out[2] = x40;
+out[3] = x51 + x37;
+out[4] = x52;
+out[5] = x49;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e338m15/femul.h b/src/Specific/solinas64_2e338m15/femul.h
new file mode 100644
index 000000000..8ce27823e
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x12, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x22, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15);
diff --git a/src/Specific/solinas64_2e338m15/fesquare.c b/src/Specific/solinas64_2e338m15/fesquare.c
new file mode 100644
index 000000000..67017c61d
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/fesquare.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x11 = (((uint128_t)x2 * x9) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x9 * x2))))));
+{ uint128_t x12 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0xf * ((uint128_t)x9 * x9)));
+{ uint128_t x13 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xf * ((0x2 * ((uint128_t)x10 * x9)) + (0x2 * ((uint128_t)x9 * x10)))));
+{ uint128_t x14 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xf * (((uint128_t)x8 * x9) + ((0x2 * ((uint128_t)x10 * x10)) + ((uint128_t)x9 * x8)))));
+{ uint128_t x15 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xf * (((uint128_t)x6 * x9) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + ((uint128_t)x9 * x6))))));
+{ uint128_t x16 = (((uint128_t)x2 * x2) + (0xf * ((0x2 * ((uint128_t)x4 * x9)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + (0x2 * ((uint128_t)x9 * x4))))))));
+{ uint128_t x17 = (x16 >> 0x39);
+{ uint64_t x18 = ((uint64_t)x16 & 0x1ffffffffffffff);
+{ uint128_t x19 = (x17 + x15);
+{ uint128_t x20 = (x19 >> 0x38);
+{ uint64_t x21 = ((uint64_t)x19 & 0xffffffffffffff);
+{ uint128_t x22 = (x20 + x14);
+{ uint128_t x23 = (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint128_t x25 = (x23 + x13);
+{ uint128_t x26 = (x25 >> 0x39);
+{ uint64_t x27 = ((uint64_t)x25 & 0x1ffffffffffffff);
+{ uint128_t x28 = (x26 + x12);
+{ uint128_t x29 = (x28 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+{ uint128_t x31 = (x29 + x11);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x38);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+{ uint128_t x34 = (x18 + ((uint128_t)0xf * x32));
+{ uint64_t x35 = (uint64_t) (x34 >> 0x39);
+{ uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
+{ uint64_t x37 = (x35 + x21);
+{ uint64_t x38 = (x37 >> 0x38);
+{ uint64_t x39 = (x37 & 0xffffffffffffff);
+out[0] = x33;
+out[1] = x30;
+out[2] = x27;
+out[3] = x38 + x24;
+out[4] = x39;
+out[5] = x36;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[6];
diff --git a/src/Specific/solinas64_2e338m15/fesquare.h b/src/Specific/solinas64_2e338m15/fesquare.h
new file mode 100644
index 000000000..5ba46828c
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e338m15/freeze.c b/src/Specific/solinas64_2e338m15/freeze.c
new file mode 100644
index 000000000..4f7156067
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x12;
+out[1] = uint8_t x13 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffff1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e338m15/freeze.h b/src/Specific/solinas64_2e338m15/freeze.h
new file mode 100644
index 000000000..e1bbb5273
--- /dev/null
+++ b/src/Specific/solinas64_2e338m15/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x9, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e369m25/femul.c b/src/Specific/solinas64_2e369m25/femul.c
new file mode 100644
index 000000000..d58b08e29
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x19 * ((uint128_t)x16 * x30)));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x19 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x19 * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x19 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x19 * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x19 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x19 * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x2f);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7fffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x2e);
+{ uint64_t x44 = ((uint64_t)x42 & 0x3fffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x2e);
+{ uint64_t x47 = ((uint64_t)x45 & 0x3fffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint64_t x49 = (uint64_t) (x48 >> 0x2e);
+{ uint64_t x50 = ((uint64_t)x48 & 0x3fffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x2e);
+{ uint64_t x53 = ((uint64_t)x51 & 0x3fffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint64_t x55 = (uint64_t) (x54 >> 0x2e);
+{ uint64_t x56 = ((uint64_t)x54 & 0x3fffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x2e);
+{ uint64_t x59 = ((uint64_t)x57 & 0x3fffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x2e);
+{ uint64_t x62 = ((uint64_t)x60 & 0x3fffffffffff);
+{ uint64_t x63 = (x41 + (0x19 * x61));
+{ uint64_t x64 = (x63 >> 0x2f);
+{ uint64_t x65 = (x63 & 0x7fffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x2e);
+{ uint64_t x68 = (x66 & 0x3fffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e369m25/femul.h b/src/Specific/solinas64_2e369m25/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e369m25/fesquare.c b/src/Specific/solinas64_2e369m25/fesquare.c
new file mode 100644
index 000000000..e4fdbf115
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x19 * ((uint128_t)x13 * x13)));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x19 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x19 * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x19 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x19 * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x19 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x19 * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x2f);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7fffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x2e);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x2e);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x2e);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x2e);
+{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x2e);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x2e);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x2e);
+{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
+{ uint64_t x46 = (x24 + (0x19 * x44));
+{ uint64_t x47 = (x46 >> 0x2f);
+{ uint64_t x48 = (x46 & 0x7fffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x2e);
+{ uint64_t x51 = (x49 & 0x3fffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e369m25/fesquare.h b/src/Specific/solinas64_2e369m25/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e369m25/freeze.c b/src/Specific/solinas64_2e369m25/freeze.c
new file mode 100644
index 000000000..780892147
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 47 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffffe7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e369m25/freeze.h b/src/Specific/solinas64_2e369m25/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e369m25/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e379m19/femul.c b/src/Specific/solinas64_2e379m19/femul.c
new file mode 100644
index 000000000..47cf1b36d
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/femul.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint128_t x28 = (((uint128_t)x5 * x26) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((0x2 * ((uint128_t)x15 * x19)) + ((uint128_t)x14 * x17)))))));
+{ uint128_t x29 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((0x2 * ((uint128_t)x13 * x19)) + ((uint128_t)x15 * x17)))))) + (0x13 * ((uint128_t)x14 * x26)));
+{ uint128_t x30 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((0x2 * ((uint128_t)x11 * x19)) + ((uint128_t)x13 * x17))))) + (0x13 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
+{ uint128_t x31 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((0x2 * ((uint128_t)x9 * x19)) + ((uint128_t)x11 * x17)))) + (0x13 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
+{ uint128_t x32 = ((((uint128_t)x5 * x21) + ((0x2 * ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17))) + (0x13 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) + (0x13 * (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
+{ uint128_t x34 = (((uint128_t)x5 * x17) + (0x13 * ((0x2 * ((uint128_t)x7 * x26)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + (0x2 * ((uint128_t)x14 * x19)))))))));
+{ uint128_t x35 = (x34 >> 0x37);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+{ uint128_t x37 = (x35 + x33);
+{ uint128_t x38 = (x37 >> 0x36);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+{ uint128_t x40 = (x38 + x32);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x36);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
+{ uint128_t x43 = (x41 + x31);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x36);
+{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffffff);
+{ uint128_t x46 = (x44 + x30);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x36);
+{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
+{ uint128_t x49 = (x47 + x29);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x36);
+{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
+{ uint128_t x52 = (x50 + x28);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x36);
+{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffffffff);
+{ uint128_t x55 = (x36 + ((uint128_t)0x13 * x53));
+{ uint64_t x56 = (uint64_t) (x55 >> 0x37);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7fffffffffffff);
+{ uint64_t x58 = (x56 + x39);
+{ uint64_t x59 = (x58 >> 0x36);
+{ uint64_t x60 = (x58 & 0x3fffffffffffff);
+out[0] = x54;
+out[1] = x51;
+out[2] = x48;
+out[3] = x45;
+out[4] = x59 + x42;
+out[5] = x60;
+out[6] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas64_2e379m19/femul.h b/src/Specific/solinas64_2e379m19/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/solinas64_2e379m19/fesquare.c b/src/Specific/solinas64_2e379m19/fesquare.c
new file mode 100644
index 000000000..cfe4b4a05
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/fesquare.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x13 = (((uint128_t)x2 * x11) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x11 * x2)))))));
+{ uint128_t x14 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x13 * ((uint128_t)x11 * x11)));
+{ uint128_t x15 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x13 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
+{ uint128_t x16 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x13 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
+{ uint128_t x17 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x13 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
+{ uint128_t x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13 * (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
+{ uint128_t x19 = (((uint128_t)x2 * x2) + (0x13 * ((0x2 * ((uint128_t)x4 * x11)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + (0x2 * ((uint128_t)x11 * x4)))))))));
+{ uint128_t x20 = (x19 >> 0x37);
+{ uint64_t x21 = ((uint64_t)x19 & 0x7fffffffffffff);
+{ uint128_t x22 = (x20 + x18);
+{ uint128_t x23 = (x22 >> 0x36);
+{ uint64_t x24 = ((uint64_t)x22 & 0x3fffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x36);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x36);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+{ uint128_t x31 = (x29 + x15);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x36);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
+{ uint128_t x34 = (x32 + x14);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x36);
+{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffffff);
+{ uint128_t x37 = (x35 + x13);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x36);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+{ uint128_t x40 = (x21 + ((uint128_t)0x13 * x38));
+{ uint64_t x41 = (uint64_t) (x40 >> 0x37);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
+{ uint64_t x43 = (x41 + x24);
+{ uint64_t x44 = (x43 >> 0x36);
+{ uint64_t x45 = (x43 & 0x3fffffffffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x30;
+out[4] = x44 + x27;
+out[5] = x45;
+out[6] = x42;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas64_2e379m19/fesquare.h b/src/Specific/solinas64_2e379m19/fesquare.h
new file mode 100644
index 000000000..fef33c926
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e379m19/freeze.c b/src/Specific/solinas64_2e379m19/freeze.c
new file mode 100644
index 000000000..32b8dc691
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffffffed;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e379m19/freeze.h b/src/Specific/solinas64_2e379m19/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas64_2e379m19/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e382m105/femul.c b/src/Specific/solinas64_2e382m105/femul.c
new file mode 100644
index 000000000..f187d84a1
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x69 * ((uint128_t)x20 * x38)));
+{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x69 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+{ uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x69 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x69 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
+{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x69 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
+{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x69 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x69 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x69 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x69 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+{ uint64_t x50 = (uint64_t) (x49 >> 0x27);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7fffffffff);
+{ uint128_t x52 = (x50 + x48);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x26);
+{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
+{ uint128_t x55 = (x53 + x47);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x26);
+{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
+{ uint128_t x58 = (x56 + x46);
+{ uint64_t x59 = (uint64_t) (x58 >> 0x26);
+{ uint64_t x60 = ((uint64_t)x58 & 0x3fffffffff);
+{ uint128_t x61 = (x59 + x45);
+{ uint64_t x62 = (uint64_t) (x61 >> 0x26);
+{ uint64_t x63 = ((uint64_t)x61 & 0x3fffffffff);
+{ uint128_t x64 = (x62 + x44);
+{ uint64_t x65 = (uint64_t) (x64 >> 0x27);
+{ uint64_t x66 = ((uint64_t)x64 & 0x7fffffffff);
+{ uint128_t x67 = (x65 + x43);
+{ uint64_t x68 = (uint64_t) (x67 >> 0x26);
+{ uint64_t x69 = ((uint64_t)x67 & 0x3fffffffff);
+{ uint128_t x70 = (x68 + x42);
+{ uint64_t x71 = (uint64_t) (x70 >> 0x26);
+{ uint64_t x72 = ((uint64_t)x70 & 0x3fffffffff);
+{ uint128_t x73 = (x71 + x41);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x26);
+{ uint64_t x75 = ((uint64_t)x73 & 0x3fffffffff);
+{ uint128_t x76 = (x74 + x40);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x26);
+{ uint64_t x78 = ((uint64_t)x76 & 0x3fffffffff);
+{ uint64_t x79 = (x51 + (0x69 * x77));
+{ uint64_t x80 = (x79 >> 0x27);
+{ uint64_t x81 = (x79 & 0x7fffffffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x26);
+{ uint64_t x84 = (x82 & 0x3fffffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e382m105/femul.h b/src/Specific/solinas64_2e382m105/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas64_2e382m105/fesquare.c b/src/Specific/solinas64_2e382m105/fesquare.c
new file mode 100644
index 000000000..fc560c74b
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x69 * ((uint128_t)x17 * x17)));
+{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x69 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+{ uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x69 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x69 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x69 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x69 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x69 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x69 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x69 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x27);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffff);
+{ uint128_t x31 = (x29 + x27);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x26);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffff);
+{ uint128_t x34 = (x32 + x26);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x26);
+{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffff);
+{ uint128_t x37 = (x35 + x25);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x26);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffff);
+{ uint128_t x40 = (x38 + x24);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x26);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffff);
+{ uint128_t x43 = (x41 + x23);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x27);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffff);
+{ uint128_t x46 = (x44 + x22);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x26);
+{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffff);
+{ uint128_t x49 = (x47 + x21);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x26);
+{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffff);
+{ uint128_t x52 = (x50 + x20);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x26);
+{ uint64_t x54 = ((uint64_t)x52 & 0x3fffffffff);
+{ uint128_t x55 = (x53 + x19);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x26);
+{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffffff);
+{ uint64_t x58 = (x30 + (0x69 * x56));
+{ uint64_t x59 = (x58 >> 0x27);
+{ uint64_t x60 = (x58 & 0x7fffffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x26);
+{ uint64_t x63 = (x61 & 0x3fffffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e382m105/fesquare.h b/src/Specific/solinas64_2e382m105/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e382m105/freeze.c b/src/Specific/solinas64_2e382m105/freeze.c
new file mode 100644
index 000000000..df882f8dc
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 39 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffff97;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e382m105/freeze.h b/src/Specific/solinas64_2e382m105/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e382m105/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c
new file mode 100644
index 000000000..d43fdd216
--- /dev/null
+++ b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = ℤ x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TZ 0x0;
+out[2] = x2;
+out[3] = 0xffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.h b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e384m2e128m2e96p2e32m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e384m317/femul.c b/src/Specific/solinas64_2e384m317/femul.c
new file mode 100644
index 000000000..69a256342
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x17 * x19))))))) + (0x13d * ((uint128_t)x16 * x30)));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x13d * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + ((uint128_t)x13 * x19))))) + (0x13d * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x13d * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (0x13d * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x13d * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x13d * (((uint128_t)x7 * x30) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x16 * x21)))))))));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x30);
+{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x30);
+{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x30);
+{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint64_t x49 = (uint64_t) (x48 >> 0x30);
+{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x30);
+{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint64_t x55 = (uint64_t) (x54 >> 0x30);
+{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x30);
+{ uint64_t x59 = ((uint64_t)x57 & 0xffffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x30);
+{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffff);
+{ uint64_t x63 = (x41 + (0x13d * x61));
+{ uint64_t x64 = (x63 >> 0x30);
+{ uint64_t x65 = (x63 & 0xffffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x30);
+{ uint64_t x68 = (x66 & 0xffffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e384m317/femul.h b/src/Specific/solinas64_2e384m317/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e384m317/fesquare.c b/src/Specific/solinas64_2e384m317/fesquare.c
new file mode 100644
index 000000000..0bb54dc0d
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x13d * ((uint128_t)x13 * x13)));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x13d * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x13d * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x13d * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x13d * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x13d * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x13d * (((uint128_t)x4 * x13) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((uint128_t)x13 * x4)))))))));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x30);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x30);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x30);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x30);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x30);
+{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x30);
+{ uint64_t x39 = ((uint64_t)x37 & 0xffffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x30);
+{ uint64_t x42 = ((uint64_t)x40 & 0xffffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x30);
+{ uint64_t x45 = ((uint64_t)x43 & 0xffffffffffff);
+{ uint64_t x46 = (x24 + (0x13d * x44));
+{ uint64_t x47 = (x46 >> 0x30);
+{ uint64_t x48 = (x46 & 0xffffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x30);
+{ uint64_t x51 = (x49 & 0xffffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e384m317/fesquare.h b/src/Specific/solinas64_2e384m317/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e384m317/freeze.c b/src/Specific/solinas64_2e384m317/freeze.c
new file mode 100644
index 000000000..080bf44e2
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffec3;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m317/freeze.h b/src/Specific/solinas64_2e384m317/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e384m317/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e384m5x2e368m1/freeze.c b/src/Specific/solinas64_2e384m5x2e368m1/freeze.c
new file mode 100644
index 000000000..b66ff7c62
--- /dev/null
+++ b/src/Specific/solinas64_2e384m5x2e368m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m5x2e368m1/freeze.h b/src/Specific/solinas64_2e384m5x2e368m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e384m5x2e368m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e384m79x2e376m1/freeze.c b/src/Specific/solinas64_2e384m79x2e376m1/freeze.c
new file mode 100644
index 000000000..b66ff7c62
--- /dev/null
+++ b/src/Specific/solinas64_2e384m79x2e376m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 48 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e384m79x2e376m1/freeze.h b/src/Specific/solinas64_2e384m79x2e376m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e384m79x2e376m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e401m31/femul.c b/src/Specific/solinas64_2e401m31/femul.c
new file mode 100644
index 000000000..4ff9c6bda
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((0x2 * ((uint128_t)x17 * x21)) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((0x2 * ((uint128_t)x13 * x23)) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x1f * ((uint128_t)x16 * x30)));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((0x2 * ((uint128_t)x11 * x23)) + ((0x2 * ((uint128_t)x13 * x21)) + ((uint128_t)x15 * x19)))))) + (0x1f * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x1f * (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (0x1f * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x1f * (((uint128_t)x11 * x30) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + ((uint128_t)x16 * x25)))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x1f * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x1f * ((0x2 * ((uint128_t)x7 * x30)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((0x2 * ((uint128_t)x17 * x23)) + (0x2 * ((uint128_t)x16 * x21))))))))));
+{ uint64_t x40 = (uint64_t) (x39 >> 0x33);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x32);
+{ uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x32);
+{ uint64_t x47 = ((uint64_t)x45 & 0x3ffffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint64_t x49 = (uint64_t) (x48 >> 0x32);
+{ uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x32);
+{ uint64_t x53 = ((uint64_t)x51 & 0x3ffffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint64_t x55 = (uint64_t) (x54 >> 0x32);
+{ uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x32);
+{ uint64_t x59 = ((uint64_t)x57 & 0x3ffffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x32);
+{ uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffff);
+{ uint64_t x63 = (x41 + (0x1f * x61));
+{ uint64_t x64 = (x63 >> 0x33);
+{ uint64_t x65 = (x63 & 0x7ffffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x32);
+{ uint64_t x68 = (x66 & 0x3ffffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e401m31/femul.h b/src/Specific/solinas64_2e401m31/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e401m31/fesquare.c b/src/Specific/solinas64_2e401m31/fesquare.c
new file mode 100644
index 000000000..b07c6ed53
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1f * ((uint128_t)x13 * x13)));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x13) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + ((uint128_t)x13 * x8)))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x1f * ((0x2 * ((uint128_t)x4 * x13)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + (0x2 * ((uint128_t)x13 * x4))))))))));
+{ uint64_t x23 = (uint64_t) (x22 >> 0x33);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint64_t x26 = (uint64_t) (x25 >> 0x32);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x32);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3ffffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x32);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x32);
+{ uint64_t x36 = ((uint64_t)x34 & 0x3ffffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x32);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x32);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3ffffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x32);
+{ uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffff);
+{ uint64_t x46 = (x24 + (0x1f * x44));
+{ uint64_t x47 = (x46 >> 0x33);
+{ uint64_t x48 = (x46 & 0x7ffffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x32);
+{ uint64_t x51 = (x49 & 0x3ffffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e401m31/fesquare.h b/src/Specific/solinas64_2e401m31/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e401m31/freeze.c b/src/Specific/solinas64_2e401m31/freeze.c
new file mode 100644
index 000000000..609dc230b
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/freeze.c
@@ -0,0 +1,26 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x16; uint8_t x17 = _subborrow_u51(0x0, x2, 0x7ffffffffffe1, &x16);
+out[0] = uint64_t x19;
+out[1] = uint8_t x20 = Op Syntax.SubWithGetBorrow 50 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 x17;
+out[2] = x4;
+out[3] = 0x3ffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e401m31/freeze.h b/src/Specific/solinas64_2e401m31/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e401m31/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e413m21/femul.c b/src/Specific/solinas64_2e413m21/femul.c
new file mode 100644
index 000000000..458c45ca4
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/femul.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17)
+{ uint128_t x28 = (((uint128_t)x5 * x26) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + (((uint128_t)x15 * x19) + ((uint128_t)x14 * x17)))))));
+{ uint128_t x29 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + (((uint128_t)x11 * x21) + (((uint128_t)x13 * x19) + ((uint128_t)x15 * x17)))))) + (0x15 * ((uint128_t)x14 * x26)));
+{ uint128_t x30 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + (((uint128_t)x11 * x19) + ((uint128_t)x13 * x17))))) + (0x15 * (((uint128_t)x15 * x26) + ((uint128_t)x14 * x27))));
+{ uint128_t x31 = ((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + (((uint128_t)x9 * x19) + ((uint128_t)x11 * x17)))) + (0x15 * (((uint128_t)x13 * x26) + (((uint128_t)x15 * x27) + ((uint128_t)x14 * x25)))));
+{ uint128_t x32 = ((((uint128_t)x5 * x21) + (((uint128_t)x7 * x19) + ((uint128_t)x9 * x17))) + (0x15 * (((uint128_t)x11 * x26) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x14 * x23))))));
+{ ℤ x33 = ((((uint128_t)x5 * x19) + ((uint128_t)x7 * x17)) +ℤ (0x15 *ℤ (((uint128_t)x9 * x26) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x14 * x21)))))));
+{ ℤ x34 = (((uint128_t)x5 * x17) +ℤ (0x15 *ℤ (((uint128_t)x7 * x26) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + (((uint128_t)x15 * x21) + ((uint128_t)x14 * x19))))))));
+{ uint128_t x35 = (x34 >> 0x3b);
+{ uint64_t x36 = (x34 & 0x7ffffffffffffff);
+{ ℤ x37 = (x35 +ℤ x33);
+{ uint128_t x38 = (x37 >> 0x3b);
+{ uint64_t x39 = (x37 & 0x7ffffffffffffff);
+{ uint128_t x40 = (x38 + x32);
+{ uint128_t x41 = (x40 >> 0x3b);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+{ uint128_t x43 = (x41 + x31);
+{ uint128_t x44 = (x43 >> 0x3b);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffffff);
+{ uint128_t x46 = (x44 + x30);
+{ uint128_t x47 = (x46 >> 0x3b);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
+{ uint128_t x49 = (x47 + x29);
+{ uint128_t x50 = (x49 >> 0x3b);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffffff);
+{ uint128_t x52 = (x50 + x28);
+{ uint128_t x53 = (x52 >> 0x3b);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffffff);
+{ uint128_t x55 = (x36 + (0x15 * x53));
+{ uint64_t x56 = (uint64_t) (x55 >> 0x3b);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffffff);
+{ uint64_t x58 = (x56 + x39);
+{ uint64_t x59 = (x58 >> 0x3b);
+{ uint64_t x60 = (x58 & 0x7ffffffffffffff);
+out[0] = x54;
+out[1] = x51;
+out[2] = x48;
+out[3] = x45;
+out[4] = x59 + x42;
+out[5] = x60;
+out[6] = x57;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas64_2e413m21/femul.h b/src/Specific/solinas64_2e413m21/femul.h
new file mode 100644
index 000000000..ad4e84953
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x14, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x26, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17);
diff --git a/src/Specific/solinas64_2e413m21/fesquare.c b/src/Specific/solinas64_2e413m21/fesquare.c
new file mode 100644
index 000000000..0f513bb27
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/fesquare.c
@@ -0,0 +1,61 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x13 = (((uint128_t)x2 * x11) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x11 * x2)))))));
+{ uint128_t x14 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x15 * ((uint128_t)x11 * x11)));
+{ uint128_t x15 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x11) + ((uint128_t)x11 * x12))));
+{ uint128_t x16 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x15 * (((uint128_t)x10 * x11) + (((uint128_t)x12 * x12) + ((uint128_t)x11 * x10)))));
+{ uint128_t x17 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x11) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((uint128_t)x11 * x8))))));
+{ ℤ x18 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) +ℤ (0x15 *ℤ (((uint128_t)x6 * x11) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((uint128_t)x11 * x6)))))));
+{ ℤ x19 = (((uint128_t)x2 * x2) +ℤ (0x15 *ℤ (((uint128_t)x4 * x11) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((uint128_t)x11 * x4))))))));
+{ uint128_t x20 = (x19 >> 0x3b);
+{ uint64_t x21 = (x19 & 0x7ffffffffffffff);
+{ ℤ x22 = (x20 +ℤ x18);
+{ uint128_t x23 = (x22 >> 0x3b);
+{ uint64_t x24 = (x22 & 0x7ffffffffffffff);
+{ uint128_t x25 = (x23 + x17);
+{ uint128_t x26 = (x25 >> 0x3b);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7ffffffffffffff);
+{ uint128_t x28 = (x26 + x16);
+{ uint128_t x29 = (x28 >> 0x3b);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+{ uint128_t x31 = (x29 + x15);
+{ uint128_t x32 = (x31 >> 0x3b);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffffff);
+{ uint128_t x34 = (x32 + x14);
+{ uint128_t x35 = (x34 >> 0x3b);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
+{ uint128_t x37 = (x35 + x13);
+{ uint128_t x38 = (x37 >> 0x3b);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffffff);
+{ uint128_t x40 = (x21 + (0x15 * x38));
+{ uint64_t x41 = (uint64_t) (x40 >> 0x3b);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+{ uint64_t x43 = (x41 + x24);
+{ uint64_t x44 = (x43 >> 0x3b);
+{ uint64_t x45 = (x43 & 0x7ffffffffffffff);
+out[0] = x39;
+out[1] = x36;
+out[2] = x33;
+out[3] = x30;
+out[4] = x44 + x27;
+out[5] = x45;
+out[6] = x42;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[7];
diff --git a/src/Specific/solinas64_2e413m21/fesquare.h b/src/Specific/solinas64_2e413m21/fesquare.h
new file mode 100644
index 000000000..fef33c926
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e413m21/freeze.c b/src/Specific/solinas64_2e413m21/freeze.c
new file mode 100644
index 000000000..3b53d6ed6
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x14;
+out[1] = uint8_t x15 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffffffeb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e413m21/freeze.h b/src/Specific/solinas64_2e413m21/freeze.h
new file mode 100644
index 000000000..b2c28ccf1
--- /dev/null
+++ b/src/Specific/solinas64_2e413m21/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x11, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e414m17/femul.c b/src/Specific/solinas64_2e414m17/femul.c
new file mode 100644
index 000000000..7d8d48e65
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x11 * ((uint128_t)x18 * x34)));
+{ uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x11 * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
+{ uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x11 * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
+{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x11 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x11 * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x11 * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
+{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x11 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x11 * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
+{ uint64_t x45 = (uint64_t) (x44 >> 0x2e);
+{ uint64_t x46 = ((uint64_t)x44 & 0x3fffffffffff);
+{ uint128_t x47 = (x45 + x43);
+{ uint64_t x48 = (uint64_t) (x47 >> 0x2e);
+{ uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffff);
+{ uint128_t x50 = (x48 + x42);
+{ uint64_t x51 = (uint64_t) (x50 >> 0x2e);
+{ uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffff);
+{ uint128_t x53 = (x51 + x41);
+{ uint64_t x54 = (uint64_t) (x53 >> 0x2e);
+{ uint64_t x55 = ((uint64_t)x53 & 0x3fffffffffff);
+{ uint128_t x56 = (x54 + x40);
+{ uint64_t x57 = (uint64_t) (x56 >> 0x2e);
+{ uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffff);
+{ uint128_t x59 = (x57 + x39);
+{ uint64_t x60 = (uint64_t) (x59 >> 0x2e);
+{ uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffff);
+{ uint128_t x62 = (x60 + x38);
+{ uint64_t x63 = (uint64_t) (x62 >> 0x2e);
+{ uint64_t x64 = ((uint64_t)x62 & 0x3fffffffffff);
+{ uint128_t x65 = (x63 + x37);
+{ uint64_t x66 = (uint64_t) (x65 >> 0x2e);
+{ uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffff);
+{ uint128_t x68 = (x66 + x36);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x2e);
+{ uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffff);
+{ uint64_t x71 = (x46 + (0x11 * x69));
+{ uint64_t x72 = (x71 >> 0x2e);
+{ uint64_t x73 = (x71 & 0x3fffffffffff);
+{ uint64_t x74 = (x72 + x49);
+{ uint64_t x75 = (x74 >> 0x2e);
+{ uint64_t x76 = (x74 & 0x3fffffffffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e414m17/femul.h b/src/Specific/solinas64_2e414m17/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas64_2e414m17/fesquare.c b/src/Specific/solinas64_2e414m17/fesquare.c
new file mode 100644
index 000000000..d07123153
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
+{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * ((uint128_t)x15 * x15)));
+{ uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
+{ uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
+{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+{ uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
+{ uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
+{ uint64_t x26 = (uint64_t) (x25 >> 0x2e);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3fffffffffff);
+{ uint128_t x28 = (x26 + x24);
+{ uint64_t x29 = (uint64_t) (x28 >> 0x2e);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffff);
+{ uint128_t x31 = (x29 + x23);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x2e);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffff);
+{ uint128_t x34 = (x32 + x22);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x2e);
+{ uint64_t x36 = ((uint64_t)x34 & 0x3fffffffffff);
+{ uint128_t x37 = (x35 + x21);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x2e);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffff);
+{ uint128_t x40 = (x38 + x20);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x2e);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffff);
+{ uint128_t x43 = (x41 + x19);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x2e);
+{ uint64_t x45 = ((uint64_t)x43 & 0x3fffffffffff);
+{ uint128_t x46 = (x44 + x18);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x2e);
+{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffff);
+{ uint128_t x49 = (x47 + x17);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x2e);
+{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffff);
+{ uint64_t x52 = (x27 + (0x11 * x50));
+{ uint64_t x53 = (x52 >> 0x2e);
+{ uint64_t x54 = (x52 & 0x3fffffffffff);
+{ uint64_t x55 = (x53 + x30);
+{ uint64_t x56 = (x55 >> 0x2e);
+{ uint64_t x57 = (x55 & 0x3fffffffffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e414m17/fesquare.h b/src/Specific/solinas64_2e414m17/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e414m17/freeze.c b/src/Specific/solinas64_2e414m17/freeze.c
new file mode 100644
index 000000000..9098c4e42
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 46 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x3fffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e414m17/freeze.h b/src/Specific/solinas64_2e414m17/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas64_2e414m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e416m2e208m1/femul.c b/src/Specific/solinas64_2e416m2e208m1/femul.c
new file mode 100644
index 000000000..fc04758dc
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/femul.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+{ uint64_t x46 = (uint64_t) (x42 >> 0x34);
+{ uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffff);
+{ uint64_t x48 = (uint64_t) (x35 >> 0x34);
+{ uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffff);
+{ uint128_t x50 = (((uint128_t)0x10000000000000 * x48) + x49);
+{ uint64_t x51 = (uint64_t) (x50 >> 0x34);
+{ uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffff);
+{ uint128_t x53 = ((x46 + x41) + x51);
+{ uint64_t x54 = (uint64_t) (x53 >> 0x34);
+{ uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffff);
+{ uint128_t x56 = (x45 + x51);
+{ uint64_t x57 = (uint64_t) (x56 >> 0x34);
+{ uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffff);
+{ uint128_t x59 = (x54 + x40);
+{ uint64_t x60 = (uint64_t) (x59 >> 0x34);
+{ uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffff);
+{ uint128_t x62 = (x57 + x44);
+{ uint64_t x63 = (uint64_t) (x62 >> 0x34);
+{ uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffff);
+{ uint128_t x65 = (x60 + x39);
+{ uint64_t x66 = (uint64_t) (x65 >> 0x34);
+{ uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffff);
+{ uint128_t x68 = (x63 + x43);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x34);
+{ uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffff);
+{ uint64_t x71 = (x66 + x52);
+{ uint64_t x72 = (x71 >> 0x34);
+{ uint64_t x73 = (x71 & 0xfffffffffffff);
+{ uint64_t x74 = (x69 + x47);
+{ uint64_t x75 = (x74 >> 0x34);
+{ uint64_t x76 = (x74 & 0xfffffffffffff);
+{ uint64_t x77 = ((0x10000000000000 * x72) + x73);
+{ uint64_t x78 = (x77 >> 0x34);
+{ uint64_t x79 = (x77 & 0xfffffffffffff);
+{ uint64_t x80 = ((x75 + x55) + x78);
+{ uint64_t x81 = (x80 >> 0x34);
+{ uint64_t x82 = (x80 & 0xfffffffffffff);
+{ uint64_t x83 = (x58 + x78);
+{ uint64_t x84 = (x83 >> 0x34);
+{ uint64_t x85 = (x83 & 0xfffffffffffff);
+out[0] = x79;
+out[1] = x67;
+out[2] = x81 + x61;
+out[3] = x82;
+out[4] = x76;
+out[5] = x70;
+out[6] = x84 + x64;
+out[7] = x85;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e416m2e208m1/femul.h b/src/Specific/solinas64_2e416m2e208m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e416m2e208m1/fesquare.c b/src/Specific/solinas64_2e416m2e208m1/fesquare.c
new file mode 100644
index 000000000..2cca42bdd
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/fesquare.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+{ uint64_t x29 = (uint64_t) (x25 >> 0x34);
+{ uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffff);
+{ uint64_t x31 = (uint64_t) (x18 >> 0x34);
+{ uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffff);
+{ uint128_t x33 = (((uint128_t)0x10000000000000 * x31) + x32);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x34);
+{ uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffff);
+{ uint128_t x36 = ((x29 + x24) + x34);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x34);
+{ uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffff);
+{ uint128_t x39 = (x28 + x34);
+{ uint64_t x40 = (uint64_t) (x39 >> 0x34);
+{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffff);
+{ uint128_t x42 = (x37 + x23);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x34);
+{ uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffff);
+{ uint128_t x45 = (x40 + x27);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x34);
+{ uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffff);
+{ uint128_t x48 = (x43 + x22);
+{ uint64_t x49 = (uint64_t) (x48 >> 0x34);
+{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffff);
+{ uint128_t x51 = (x46 + x26);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x34);
+{ uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffff);
+{ uint64_t x54 = (x49 + x35);
+{ uint64_t x55 = (x54 >> 0x34);
+{ uint64_t x56 = (x54 & 0xfffffffffffff);
+{ uint64_t x57 = (x52 + x30);
+{ uint64_t x58 = (x57 >> 0x34);
+{ uint64_t x59 = (x57 & 0xfffffffffffff);
+{ uint64_t x60 = ((0x10000000000000 * x55) + x56);
+{ uint64_t x61 = (x60 >> 0x34);
+{ uint64_t x62 = (x60 & 0xfffffffffffff);
+{ uint64_t x63 = ((x58 + x38) + x61);
+{ uint64_t x64 = (x63 >> 0x34);
+{ uint64_t x65 = (x63 & 0xfffffffffffff);
+{ uint64_t x66 = (x41 + x61);
+{ uint64_t x67 = (x66 >> 0x34);
+{ uint64_t x68 = (x66 & 0xfffffffffffff);
+out[0] = x62;
+out[1] = x50;
+out[2] = x64 + x44;
+out[3] = x65;
+out[4] = x59;
+out[5] = x53;
+out[6] = x67 + x47;
+out[7] = x68;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e416m2e208m1/fesquare.h b/src/Specific/solinas64_2e416m2e208m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e416m2e208m1/freeze.c b/src/Specific/solinas64_2e416m2e208m1/freeze.c
new file mode 100644
index 000000000..14a01d292
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e416m2e208m1/freeze.h b/src/Specific/solinas64_2e416m2e208m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e416m2e208m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e444m17/femul.c b/src/Specific/solinas64_2e444m17/femul.c
new file mode 100644
index 000000000..729012826
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+{ uint128_t x40 = (x39 >> 0x38);
+{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint128_t x43 = (x42 >> 0x37);
+{ uint64_t x44 = ((uint64_t)x42 & 0x7fffffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint128_t x46 = (x45 >> 0x38);
+{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint128_t x49 = (x48 >> 0x37);
+{ uint64_t x50 = ((uint64_t)x48 & 0x7fffffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint128_t x52 = (x51 >> 0x38);
+{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint128_t x55 = (x54 >> 0x37);
+{ uint64_t x56 = ((uint64_t)x54 & 0x7fffffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x38);
+{ uint64_t x59 = ((uint64_t)x57 & 0xffffffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x37);
+{ uint64_t x62 = ((uint64_t)x60 & 0x7fffffffffffff);
+{ uint128_t x63 = (x41 + ((uint128_t)0x11 * x61));
+{ uint64_t x64 = (uint64_t) (x63 >> 0x38);
+{ uint64_t x65 = ((uint64_t)x63 & 0xffffffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x37);
+{ uint64_t x68 = (x66 & 0x7fffffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e444m17/femul.h b/src/Specific/solinas64_2e444m17/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e444m17/fesquare.c b/src/Specific/solinas64_2e444m17/fesquare.c
new file mode 100644
index 000000000..0633a574b
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+{ uint128_t x23 = (x22 >> 0x38);
+{ uint64_t x24 = ((uint64_t)x22 & 0xffffffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint128_t x26 = (x25 >> 0x37);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint128_t x29 = (x28 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x28 & 0xffffffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint128_t x32 = (x31 >> 0x37);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint128_t x35 = (x34 >> 0x38);
+{ uint64_t x36 = ((uint64_t)x34 & 0xffffffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint128_t x38 = (x37 >> 0x37);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x38);
+{ uint64_t x42 = ((uint64_t)x40 & 0xffffffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x37);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+{ uint128_t x46 = (x24 + ((uint128_t)0x11 * x44));
+{ uint64_t x47 = (uint64_t) (x46 >> 0x38);
+{ uint64_t x48 = ((uint64_t)x46 & 0xffffffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x37);
+{ uint64_t x51 = (x49 & 0x7fffffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e444m17/fesquare.h b/src/Specific/solinas64_2e444m17/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e444m17/freeze.c b/src/Specific/solinas64_2e444m17/freeze.c
new file mode 100644
index 000000000..879618713
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e444m17/freeze.h b/src/Specific/solinas64_2e444m17/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e444m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e448m2e224m1/femul.c b/src/Specific/solinas64_2e448m2e224m1/femul.c
new file mode 100644
index 000000000..698c9e8ff
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/femul.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+{ uint64_t x46 = (uint64_t) (x42 >> 0x38);
+{ uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
+{ uint64_t x48 = (uint64_t) (x35 >> 0x38);
+{ uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
+{ uint128_t x50 = (((uint128_t)0x100000000000000 * x48) + x49);
+{ uint64_t x51 = (uint64_t) (x50 >> 0x38);
+{ uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
+{ uint128_t x53 = ((x46 + x41) + x51);
+{ uint64_t x54 = (uint64_t) (x53 >> 0x38);
+{ uint64_t x55 = ((uint64_t)x53 & 0xffffffffffffff);
+{ uint128_t x56 = (x45 + x51);
+{ uint64_t x57 = (uint64_t) (x56 >> 0x38);
+{ uint64_t x58 = ((uint64_t)x56 & 0xffffffffffffff);
+{ uint128_t x59 = (x54 + x40);
+{ uint64_t x60 = (uint64_t) (x59 >> 0x38);
+{ uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
+{ uint128_t x62 = (x57 + x44);
+{ uint64_t x63 = (uint64_t) (x62 >> 0x38);
+{ uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
+{ uint128_t x65 = (x60 + x39);
+{ uint64_t x66 = (uint64_t) (x65 >> 0x38);
+{ uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
+{ uint128_t x68 = (x63 + x43);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x38);
+{ uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
+{ uint64_t x71 = (x66 + x52);
+{ uint64_t x72 = (x71 >> 0x38);
+{ uint64_t x73 = (x71 & 0xffffffffffffff);
+{ uint64_t x74 = (x69 + x47);
+{ uint64_t x75 = (x74 >> 0x38);
+{ uint64_t x76 = (x74 & 0xffffffffffffff);
+{ uint64_t x77 = ((0x100000000000000 * x72) + x73);
+{ uint64_t x78 = (x77 >> 0x38);
+{ uint64_t x79 = (x77 & 0xffffffffffffff);
+{ uint64_t x80 = ((x75 + x55) + x78);
+{ uint64_t x81 = (x80 >> 0x38);
+{ uint64_t x82 = (x80 & 0xffffffffffffff);
+{ uint64_t x83 = (x58 + x78);
+{ uint64_t x84 = (x83 >> 0x38);
+{ uint64_t x85 = (x83 & 0xffffffffffffff);
+out[0] = x79;
+out[1] = x67;
+out[2] = x81 + x61;
+out[3] = x82;
+out[4] = x76;
+out[5] = x70;
+out[6] = x84 + x64;
+out[7] = x85;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e448m2e224m1/femul.h b/src/Specific/solinas64_2e448m2e224m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e448m2e224m1/fesquare.c b/src/Specific/solinas64_2e448m2e224m1/fesquare.c
new file mode 100644
index 000000000..0dfda7f19
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/fesquare.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+{ uint64_t x29 = (uint64_t) (x25 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint64_t x31 = (uint64_t) (x18 >> 0x38);
+{ uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
+{ uint128_t x33 = (((uint128_t)0x100000000000000 * x31) + x32);
+{ uint64_t x34 = (uint64_t) (x33 >> 0x38);
+{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
+{ uint128_t x36 = ((x29 + x24) + x34);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x38);
+{ uint64_t x38 = ((uint64_t)x36 & 0xffffffffffffff);
+{ uint128_t x39 = (x28 + x34);
+{ uint64_t x40 = (uint64_t) (x39 >> 0x38);
+{ uint64_t x41 = ((uint64_t)x39 & 0xffffffffffffff);
+{ uint128_t x42 = (x37 + x23);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x38);
+{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+{ uint128_t x45 = (x40 + x27);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x38);
+{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+{ uint128_t x48 = (x43 + x22);
+{ uint64_t x49 = (uint64_t) (x48 >> 0x38);
+{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+{ uint128_t x51 = (x46 + x26);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x38);
+{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+{ uint64_t x54 = (x49 + x35);
+{ uint64_t x55 = (x54 >> 0x38);
+{ uint64_t x56 = (x54 & 0xffffffffffffff);
+{ uint64_t x57 = (x52 + x30);
+{ uint64_t x58 = (x57 >> 0x38);
+{ uint64_t x59 = (x57 & 0xffffffffffffff);
+{ uint64_t x60 = ((0x100000000000000 * x55) + x56);
+{ uint64_t x61 = (x60 >> 0x38);
+{ uint64_t x62 = (x60 & 0xffffffffffffff);
+{ uint64_t x63 = ((x58 + x38) + x61);
+{ uint64_t x64 = (x63 >> 0x38);
+{ uint64_t x65 = (x63 & 0xffffffffffffff);
+{ uint64_t x66 = (x41 + x61);
+{ uint64_t x67 = (x66 >> 0x38);
+{ uint64_t x68 = (x66 & 0xffffffffffffff);
+out[0] = x62;
+out[1] = x50;
+out[2] = x64 + x44;
+out[3] = x65;
+out[4] = x59;
+out[5] = x53;
+out[6] = x67 + x47;
+out[7] = x68;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e448m2e224m1/fesquare.h b/src/Specific/solinas64_2e448m2e224m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e448m2e224m1/freeze.c b/src/Specific/solinas64_2e448m2e224m1/freeze.c
new file mode 100644
index 000000000..ffc6a3184
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 56 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e448m2e224m1/freeze.h b/src/Specific/solinas64_2e448m2e224m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e448m2e224m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e450m2e225m1/femul.c b/src/Specific/solinas64_2e450m2e225m1/femul.c
new file mode 100644
index 000000000..43d167528
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/femul.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+{ uint128_t x34 = (((0x2 * ((uint128_t)(x7 + x15) * (x25 + x30))) + ((0x2 * ((uint128_t)(x9 + x17) * (x23 + x31))) + (0x2 * ((uint128_t)(x11 + x16) * (x21 + x29))))) - ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))));
+{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + ((0x2 * ((uint128_t)(x7 + x15) * (x23 + x31))) + ((0x2 * ((uint128_t)(x9 + x17) * (x21 + x29))) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))));
+{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + ((0x2 * ((uint128_t)(x7 + x15) * (x21 + x29))) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))));
+{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+{ uint128_t x41 = (((((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + (0x2 * ((uint128_t)x11 * x21)))) + ((0x2 * ((uint128_t)x15 * x30)) + ((0x2 * ((uint128_t)x17 * x31)) + (0x2 * ((uint128_t)x16 * x29))))) + x38) + x34);
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((0x2 * ((uint128_t)x9 * x21)) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((uint128_t)x16 * x27)))));
+{ uint128_t x43 = (((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + ((uint128_t)x17 * x27)))) + x32);
+{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+{ uint64_t x46 = (uint64_t) (x42 >> 0x38);
+{ uint64_t x47 = ((uint64_t)x42 & 0xffffffffffffff);
+{ uint128_t x48 = (x35 >> 0x38);
+{ uint64_t x49 = ((uint64_t)x35 & 0xffffffffffffff);
+{ uint128_t x50 = ((0x100000000000000 * x48) + x49);
+{ uint128_t x51 = (x50 >> 0x38);
+{ uint64_t x52 = ((uint64_t)x50 & 0xffffffffffffff);
+{ uint128_t x53 = ((x46 + x41) + x51);
+{ uint64_t x54 = (uint64_t) (x53 >> 0x39);
+{ uint64_t x55 = ((uint64_t)x53 & 0x1ffffffffffffff);
+{ uint128_t x56 = (x45 + x51);
+{ uint64_t x57 = (uint64_t) (x56 >> 0x39);
+{ uint64_t x58 = ((uint64_t)x56 & 0x1ffffffffffffff);
+{ uint128_t x59 = (x54 + x40);
+{ uint64_t x60 = (uint64_t) (x59 >> 0x38);
+{ uint64_t x61 = ((uint64_t)x59 & 0xffffffffffffff);
+{ uint128_t x62 = (x57 + x44);
+{ uint64_t x63 = (uint64_t) (x62 >> 0x38);
+{ uint64_t x64 = ((uint64_t)x62 & 0xffffffffffffff);
+{ uint128_t x65 = (x60 + x39);
+{ uint128_t x66 = (x65 >> 0x38);
+{ uint64_t x67 = ((uint64_t)x65 & 0xffffffffffffff);
+{ uint128_t x68 = (x63 + x43);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x38);
+{ uint64_t x70 = ((uint64_t)x68 & 0xffffffffffffff);
+{ uint128_t x71 = (x66 + x52);
+{ uint64_t x72 = (uint64_t) (x71 >> 0x38);
+{ uint64_t x73 = ((uint64_t)x71 & 0xffffffffffffff);
+{ uint64_t x74 = (x69 + x47);
+{ uint64_t x75 = (x74 >> 0x38);
+{ uint64_t x76 = (x74 & 0xffffffffffffff);
+{ uint128_t x77 = (((uint128_t)0x100000000000000 * x72) + x73);
+{ uint64_t x78 = (uint64_t) (x77 >> 0x38);
+{ uint64_t x79 = ((uint64_t)x77 & 0xffffffffffffff);
+{ uint64_t x80 = ((x75 + x55) + x78);
+{ uint64_t x81 = (x80 >> 0x39);
+{ uint64_t x82 = (x80 & 0x1ffffffffffffff);
+{ uint64_t x83 = (x58 + x78);
+{ uint64_t x84 = (x83 >> 0x39);
+{ uint64_t x85 = (x83 & 0x1ffffffffffffff);
+out[0] = x79;
+out[1] = x67;
+out[2] = x81 + x61;
+out[3] = x82;
+out[4] = x76;
+out[5] = x70;
+out[6] = x84 + x64;
+out[7] = x85;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e450m2e225m1/femul.h b/src/Specific/solinas64_2e450m2e225m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e450m2e225m1/fesquare.c b/src/Specific/solinas64_2e450m2e225m1/fesquare.c
new file mode 100644
index 000000000..f83caa2bf
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/fesquare.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+{ uint128_t x17 = (((0x2 * ((uint128_t)(x4 + x12) * (x8 + x13))) + ((0x2 * ((uint128_t)(x6 + x14) * (x6 + x14))) + (0x2 * ((uint128_t)(x8 + x13) * (x4 + x12))))) - ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))));
+{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + ((0x2 * ((uint128_t)(x4 + x12) * (x6 + x14))) + ((0x2 * ((uint128_t)(x6 + x14) * (x4 + x12))) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))));
+{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + ((0x2 * ((uint128_t)(x4 + x12) * (x4 + x12))) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))));
+{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+{ uint128_t x24 = (((((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + (0x2 * ((uint128_t)x8 * x4)))) + ((0x2 * ((uint128_t)x12 * x13)) + ((0x2 * ((uint128_t)x14 * x14)) + (0x2 * ((uint128_t)x13 * x12))))) + x21) + x17);
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + ((0x2 * ((uint128_t)x12 * x14)) + ((0x2 * ((uint128_t)x14 * x12)) + ((uint128_t)x13 * x10)))));
+{ uint128_t x26 = (((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + ((uint128_t)x14 * x10)))) + x15);
+{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+{ uint64_t x29 = (uint64_t) (x25 >> 0x38);
+{ uint64_t x30 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint128_t x31 = (x18 >> 0x38);
+{ uint64_t x32 = ((uint64_t)x18 & 0xffffffffffffff);
+{ uint128_t x33 = ((0x100000000000000 * x31) + x32);
+{ uint128_t x34 = (x33 >> 0x38);
+{ uint64_t x35 = ((uint64_t)x33 & 0xffffffffffffff);
+{ uint128_t x36 = ((x29 + x24) + x34);
+{ uint64_t x37 = (uint64_t) (x36 >> 0x39);
+{ uint64_t x38 = ((uint64_t)x36 & 0x1ffffffffffffff);
+{ uint128_t x39 = (x28 + x34);
+{ uint64_t x40 = (uint64_t) (x39 >> 0x39);
+{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+{ uint128_t x42 = (x37 + x23);
+{ uint64_t x43 = (uint64_t) (x42 >> 0x38);
+{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+{ uint128_t x45 = (x40 + x27);
+{ uint64_t x46 = (uint64_t) (x45 >> 0x38);
+{ uint64_t x47 = ((uint64_t)x45 & 0xffffffffffffff);
+{ uint128_t x48 = (x43 + x22);
+{ uint128_t x49 = (x48 >> 0x38);
+{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+{ uint128_t x51 = (x46 + x26);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x38);
+{ uint64_t x53 = ((uint64_t)x51 & 0xffffffffffffff);
+{ uint128_t x54 = (x49 + x35);
+{ uint64_t x55 = (uint64_t) (x54 >> 0x38);
+{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
+{ uint64_t x57 = (x52 + x30);
+{ uint64_t x58 = (x57 >> 0x38);
+{ uint64_t x59 = (x57 & 0xffffffffffffff);
+{ uint128_t x60 = (((uint128_t)0x100000000000000 * x55) + x56);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x38);
+{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
+{ uint64_t x63 = ((x58 + x38) + x61);
+{ uint64_t x64 = (x63 >> 0x39);
+{ uint64_t x65 = (x63 & 0x1ffffffffffffff);
+{ uint64_t x66 = (x41 + x61);
+{ uint64_t x67 = (x66 >> 0x39);
+{ uint64_t x68 = (x66 & 0x1ffffffffffffff);
+out[0] = x62;
+out[1] = x50;
+out[2] = x64 + x44;
+out[3] = x65;
+out[4] = x59;
+out[5] = x53;
+out[6] = x67 + x47;
+out[7] = x68;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e450m2e225m1/fesquare.h b/src/Specific/solinas64_2e450m2e225m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e450m2e225m1/freeze.c b/src/Specific/solinas64_2e450m2e225m1/freeze.c
new file mode 100644
index 000000000..ab3776151
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1ffffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e450m2e225m1/freeze.h b/src/Specific/solinas64_2e450m2e225m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e450m2e225m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e452m3/femul.c b/src/Specific/solinas64_2e452m3/femul.c
new file mode 100644
index 000000000..ac501cb79
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x3 * (0x2 * ((uint128_t)x16 * x30))));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x3 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x3 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x3 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x3 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x3 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x3 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+{ uint128_t x40 = (x39 >> 0x39);
+{ uint64_t x41 = ((uint64_t)x39 & 0x1ffffffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint128_t x43 = (x42 >> 0x38);
+{ uint64_t x44 = ((uint64_t)x42 & 0xffffffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint128_t x46 = (x45 >> 0x39);
+{ uint64_t x47 = ((uint64_t)x45 & 0x1ffffffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint128_t x49 = (x48 >> 0x38);
+{ uint64_t x50 = ((uint64_t)x48 & 0xffffffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint64_t x52 = (uint64_t) (x51 >> 0x39);
+{ uint64_t x53 = ((uint64_t)x51 & 0x1ffffffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint128_t x55 = (x54 >> 0x38);
+{ uint64_t x56 = ((uint64_t)x54 & 0xffffffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x39);
+{ uint64_t x59 = ((uint64_t)x57 & 0x1ffffffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x38);
+{ uint64_t x62 = ((uint64_t)x60 & 0xffffffffffffff);
+{ uint128_t x63 = (x41 + ((uint128_t)0x3 * x61));
+{ uint64_t x64 = (uint64_t) (x63 >> 0x39);
+{ uint64_t x65 = ((uint64_t)x63 & 0x1ffffffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x38);
+{ uint64_t x68 = (x66 & 0xffffffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e452m3/femul.h b/src/Specific/solinas64_2e452m3/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e452m3/fesquare.c b/src/Specific/solinas64_2e452m3/fesquare.c
new file mode 100644
index 000000000..7b57e86be
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x3 * (0x2 * ((uint128_t)x13 * x13))));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x3 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x3 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x3 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x3 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x3 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x3 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+{ uint128_t x23 = (x22 >> 0x39);
+{ uint64_t x24 = ((uint64_t)x22 & 0x1ffffffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint128_t x26 = (x25 >> 0x38);
+{ uint64_t x27 = ((uint64_t)x25 & 0xffffffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint128_t x29 = (x28 >> 0x39);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1ffffffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint128_t x32 = (x31 >> 0x38);
+{ uint64_t x33 = ((uint64_t)x31 & 0xffffffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x39);
+{ uint64_t x36 = ((uint64_t)x34 & 0x1ffffffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint128_t x38 = (x37 >> 0x38);
+{ uint64_t x39 = ((uint64_t)x37 & 0xffffffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x39);
+{ uint64_t x42 = ((uint64_t)x40 & 0x1ffffffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x38);
+{ uint64_t x45 = ((uint64_t)x43 & 0xffffffffffffff);
+{ uint128_t x46 = (x24 + ((uint128_t)0x3 * x44));
+{ uint64_t x47 = (uint64_t) (x46 >> 0x39);
+{ uint64_t x48 = ((uint64_t)x46 & 0x1ffffffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x38);
+{ uint64_t x51 = (x49 & 0xffffffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e452m3/fesquare.h b/src/Specific/solinas64_2e452m3/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e452m3/freeze.c b/src/Specific/solinas64_2e452m3/freeze.c
new file mode 100644
index 000000000..3901344f6
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 57 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffffd;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e452m3/freeze.h b/src/Specific/solinas64_2e452m3/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e452m3/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e468m17/femul.c b/src/Specific/solinas64_2e468m17/femul.c
new file mode 100644
index 000000000..ed57de1b6
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/femul.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)x5 * x30) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + (((uint128_t)x17 * x21) + ((uint128_t)x16 * x19))))))));
+{ uint128_t x33 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + ((0x2 * ((uint128_t)x11 * x25)) + (((uint128_t)x13 * x23) + ((0x2 * ((uint128_t)x15 * x21)) + ((uint128_t)x17 * x19))))))) + (0x11 * (0x2 * ((uint128_t)x16 * x30))));
+{ uint128_t x34 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + (((uint128_t)x13 * x21) + ((uint128_t)x15 * x19)))))) + (0x11 * (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))));
+{ uint128_t x35 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + (((uint128_t)x9 * x23) + ((0x2 * ((uint128_t)x11 * x21)) + ((uint128_t)x13 * x19))))) + (0x11 * ((0x2 * ((uint128_t)x15 * x30)) + (((uint128_t)x17 * x31) + (0x2 * ((uint128_t)x16 * x29))))));
+{ uint128_t x36 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (0x11 * (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x21)) + ((uint128_t)x9 * x19))) + (0x11 * ((0x2 * ((uint128_t)x11 * x30)) + (((uint128_t)x13 * x31) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + (0x2 * ((uint128_t)x16 * x25))))))));
+{ uint128_t x38 = ((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (0x11 * (((uint128_t)x9 * x30) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((uint128_t)x16 * x23))))))));
+{ uint128_t x39 = (((uint128_t)x5 * x19) + (0x11 * ((0x2 * ((uint128_t)x7 * x30)) + (((uint128_t)x9 * x31) + ((0x2 * ((uint128_t)x11 * x29)) + (((uint128_t)x13 * x27) + ((0x2 * ((uint128_t)x15 * x25)) + (((uint128_t)x17 * x23) + (0x2 * ((uint128_t)x16 * x21))))))))));
+{ uint128_t x40 = (x39 >> 0x3b);
+{ uint64_t x41 = ((uint64_t)x39 & 0x7ffffffffffffff);
+{ uint128_t x42 = (x40 + x38);
+{ uint128_t x43 = (x42 >> 0x3a);
+{ uint64_t x44 = ((uint64_t)x42 & 0x3ffffffffffffff);
+{ uint128_t x45 = (x43 + x37);
+{ uint128_t x46 = (x45 >> 0x3b);
+{ uint64_t x47 = ((uint64_t)x45 & 0x7ffffffffffffff);
+{ uint128_t x48 = (x46 + x36);
+{ uint128_t x49 = (x48 >> 0x3a);
+{ uint64_t x50 = ((uint64_t)x48 & 0x3ffffffffffffff);
+{ uint128_t x51 = (x49 + x35);
+{ uint128_t x52 = (x51 >> 0x3b);
+{ uint64_t x53 = ((uint64_t)x51 & 0x7ffffffffffffff);
+{ uint128_t x54 = (x52 + x34);
+{ uint128_t x55 = (x54 >> 0x3a);
+{ uint64_t x56 = ((uint64_t)x54 & 0x3ffffffffffffff);
+{ uint128_t x57 = (x55 + x33);
+{ uint128_t x58 = (x57 >> 0x3b);
+{ uint64_t x59 = ((uint64_t)x57 & 0x7ffffffffffffff);
+{ uint128_t x60 = (x58 + x32);
+{ uint128_t x61 = (x60 >> 0x3a);
+{ uint64_t x62 = ((uint64_t)x60 & 0x3ffffffffffffff);
+{ uint128_t x63 = (x41 + (0x11 * x61));
+{ uint64_t x64 = (uint64_t) (x63 >> 0x3b);
+{ uint64_t x65 = ((uint64_t)x63 & 0x7ffffffffffffff);
+{ uint64_t x66 = (x64 + x44);
+{ uint64_t x67 = (x66 >> 0x3a);
+{ uint64_t x68 = (x66 & 0x3ffffffffffffff);
+out[0] = x62;
+out[1] = x59;
+out[2] = x56;
+out[3] = x53;
+out[4] = x50;
+out[5] = x67 + x47;
+out[6] = x68;
+out[7] = x65;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e468m17/femul.h b/src/Specific/solinas64_2e468m17/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e468m17/fesquare.c b/src/Specific/solinas64_2e468m17/fesquare.c
new file mode 100644
index 000000000..8394de5b0
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/fesquare.c
@@ -0,0 +1,66 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)x2 * x13) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x13 * x2))))))));
+{ uint128_t x16 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + (((uint128_t)x6 * x10) + ((0x2 * ((uint128_t)x8 * x8)) + (((uint128_t)x10 * x6) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (0x2 * ((uint128_t)x13 * x13))));
+{ uint128_t x17 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))));
+{ uint128_t x18 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + (((uint128_t)x6 * x6) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * ((0x2 * ((uint128_t)x12 * x13)) + (((uint128_t)x14 * x14) + (0x2 * ((uint128_t)x13 * x12))))));
+{ uint128_t x19 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * ((0x2 * ((uint128_t)x8 * x13)) + (((uint128_t)x10 * x14) + ((0x2 * ((uint128_t)x12 * x12)) + (((uint128_t)x14 * x10) + (0x2 * ((uint128_t)x13 * x8))))))));
+{ uint128_t x21 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x13) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + ((uint128_t)x13 * x6))))))));
+{ uint128_t x22 = (((uint128_t)x2 * x2) + (0x11 * ((0x2 * ((uint128_t)x4 * x13)) + (((uint128_t)x6 * x14) + ((0x2 * ((uint128_t)x8 * x12)) + (((uint128_t)x10 * x10) + ((0x2 * ((uint128_t)x12 * x8)) + (((uint128_t)x14 * x6) + (0x2 * ((uint128_t)x13 * x4))))))))));
+{ uint128_t x23 = (x22 >> 0x3b);
+{ uint64_t x24 = ((uint64_t)x22 & 0x7ffffffffffffff);
+{ uint128_t x25 = (x23 + x21);
+{ uint128_t x26 = (x25 >> 0x3a);
+{ uint64_t x27 = ((uint64_t)x25 & 0x3ffffffffffffff);
+{ uint128_t x28 = (x26 + x20);
+{ uint128_t x29 = (x28 >> 0x3b);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7ffffffffffffff);
+{ uint128_t x31 = (x29 + x19);
+{ uint128_t x32 = (x31 >> 0x3a);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3ffffffffffffff);
+{ uint128_t x34 = (x32 + x18);
+{ uint128_t x35 = (x34 >> 0x3b);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffffff);
+{ uint128_t x37 = (x35 + x17);
+{ uint128_t x38 = (x37 >> 0x3a);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3ffffffffffffff);
+{ uint128_t x40 = (x38 + x16);
+{ uint128_t x41 = (x40 >> 0x3b);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffffff);
+{ uint128_t x43 = (x41 + x15);
+{ uint128_t x44 = (x43 >> 0x3a);
+{ uint64_t x45 = ((uint64_t)x43 & 0x3ffffffffffffff);
+{ uint128_t x46 = (x24 + (0x11 * x44));
+{ uint64_t x47 = (uint64_t) (x46 >> 0x3b);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffffff);
+{ uint64_t x49 = (x47 + x27);
+{ uint64_t x50 = (x49 >> 0x3a);
+{ uint64_t x51 = (x49 & 0x3ffffffffffffff);
+out[0] = x45;
+out[1] = x42;
+out[2] = x39;
+out[3] = x36;
+out[4] = x33;
+out[5] = x50 + x30;
+out[6] = x51;
+out[7] = x48;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e468m17/fesquare.h b/src/Specific/solinas64_2e468m17/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e468m17/freeze.c b/src/Specific/solinas64_2e468m17/freeze.c
new file mode 100644
index 000000000..8310bd145
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 59 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7ffffffffffffef;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e468m17/freeze.h b/src/Specific/solinas64_2e468m17/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e468m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e480m2e240m1/femul.c b/src/Specific/solinas64_2e480m2e240m1/femul.c
new file mode 100644
index 000000000..af01cc5bd
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/femul.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19)
+{ uint128_t x32 = (((uint128_t)(x11 + x16) * (x25 + x30)) - ((uint128_t)x11 * x25));
+{ uint128_t x33 = ((((uint128_t)(x9 + x17) * (x25 + x30)) + ((uint128_t)(x11 + x16) * (x23 + x31))) - (((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)));
+{ uint128_t x34 = ((((uint128_t)(x7 + x15) * (x25 + x30)) + (((uint128_t)(x9 + x17) * (x23 + x31)) + ((uint128_t)(x11 + x16) * (x21 + x29)))) - (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))));
+{ uint128_t x35 = ((((uint128_t)(x5 + x13) * (x25 + x30)) + (((uint128_t)(x7 + x15) * (x23 + x31)) + (((uint128_t)(x9 + x17) * (x21 + x29)) + ((uint128_t)(x11 + x16) * (x19 + x27))))) - (((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))));
+{ uint128_t x36 = ((((uint128_t)(x5 + x13) * (x23 + x31)) + (((uint128_t)(x7 + x15) * (x21 + x29)) + ((uint128_t)(x9 + x17) * (x19 + x27)))) - (((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))));
+{ uint128_t x37 = ((((uint128_t)(x5 + x13) * (x21 + x29)) + ((uint128_t)(x7 + x15) * (x19 + x27))) - (((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)));
+{ uint128_t x38 = (((uint128_t)(x5 + x13) * (x19 + x27)) - ((uint128_t)x5 * x19));
+{ uint128_t x39 = (((((uint128_t)x11 * x25) + ((uint128_t)x16 * x30)) + x36) + x32);
+{ uint128_t x40 = ((((((uint128_t)x9 * x25) + ((uint128_t)x11 * x23)) + (((uint128_t)x17 * x30) + ((uint128_t)x16 * x31))) + x37) + x33);
+{ uint128_t x41 = ((((((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21))) + (((uint128_t)x15 * x30) + (((uint128_t)x17 * x31) + ((uint128_t)x16 * x29)))) + x38) + x34);
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + (((uint128_t)x9 * x21) + ((uint128_t)x11 * x19)))) + (((uint128_t)x13 * x30) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + ((uint128_t)x16 * x27)))));
+{ uint128_t x43 = (((((uint128_t)x5 * x23) + (((uint128_t)x7 * x21) + ((uint128_t)x9 * x19))) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((uint128_t)x17 * x27)))) + x32);
+{ uint128_t x44 = (((((uint128_t)x5 * x21) + ((uint128_t)x7 * x19)) + (((uint128_t)x13 * x29) + ((uint128_t)x15 * x27))) + x33);
+{ uint128_t x45 = ((((uint128_t)x5 * x19) + ((uint128_t)x13 * x27)) + x34);
+{ uint128_t x46 = (x42 >> 0x3c);
+{ uint64_t x47 = ((uint64_t)x42 & 0xfffffffffffffff);
+{ uint128_t x48 = (x35 >> 0x3c);
+{ uint64_t x49 = ((uint64_t)x35 & 0xfffffffffffffff);
+{ uint128_t x50 = ((0x1000000000000000 * x48) + x49);
+{ uint128_t x51 = (x50 >> 0x3c);
+{ uint64_t x52 = ((uint64_t)x50 & 0xfffffffffffffff);
+{ uint128_t x53 = ((x46 + x41) + x51);
+{ uint128_t x54 = (x53 >> 0x3c);
+{ uint64_t x55 = ((uint64_t)x53 & 0xfffffffffffffff);
+{ uint128_t x56 = (x45 + x51);
+{ uint128_t x57 = (x56 >> 0x3c);
+{ uint64_t x58 = ((uint64_t)x56 & 0xfffffffffffffff);
+{ uint128_t x59 = (x54 + x40);
+{ uint128_t x60 = (x59 >> 0x3c);
+{ uint64_t x61 = ((uint64_t)x59 & 0xfffffffffffffff);
+{ uint128_t x62 = (x57 + x44);
+{ uint128_t x63 = (x62 >> 0x3c);
+{ uint64_t x64 = ((uint64_t)x62 & 0xfffffffffffffff);
+{ uint128_t x65 = (x60 + x39);
+{ uint128_t x66 = (x65 >> 0x3c);
+{ uint64_t x67 = ((uint64_t)x65 & 0xfffffffffffffff);
+{ uint128_t x68 = (x63 + x43);
+{ uint128_t x69 = (x68 >> 0x3c);
+{ uint64_t x70 = ((uint64_t)x68 & 0xfffffffffffffff);
+{ uint128_t x71 = (x66 + x52);
+{ uint64_t x72 = (uint64_t) (x71 >> 0x3c);
+{ uint64_t x73 = ((uint64_t)x71 & 0xfffffffffffffff);
+{ uint128_t x74 = (x69 + x47);
+{ uint64_t x75 = (uint64_t) (x74 >> 0x3c);
+{ uint64_t x76 = ((uint64_t)x74 & 0xfffffffffffffff);
+{ uint128_t x77 = (((uint128_t)0x1000000000000000 * x72) + x73);
+{ uint64_t x78 = (uint64_t) (x77 >> 0x3c);
+{ uint64_t x79 = ((uint64_t)x77 & 0xfffffffffffffff);
+{ uint64_t x80 = ((x75 + x55) + x78);
+{ uint64_t x81 = (x80 >> 0x3c);
+{ uint64_t x82 = (x80 & 0xfffffffffffffff);
+{ uint64_t x83 = (x58 + x78);
+{ uint64_t x84 = (x83 >> 0x3c);
+{ uint64_t x85 = (x83 & 0xfffffffffffffff);
+out[0] = x79;
+out[1] = x67;
+out[2] = x81 + x61;
+out[3] = x82;
+out[4] = x76;
+out[5] = x70;
+out[6] = x84 + x64;
+out[7] = x85;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e480m2e240m1/femul.h b/src/Specific/solinas64_2e480m2e240m1/femul.h
new file mode 100644
index 000000000..621df99c5
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x16, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x30, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19);
diff --git a/src/Specific/solinas64_2e480m2e240m1/fesquare.c b/src/Specific/solinas64_2e480m2e240m1/fesquare.c
new file mode 100644
index 000000000..feaea291a
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/fesquare.c
@@ -0,0 +1,83 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x15 = (((uint128_t)(x8 + x13) * (x8 + x13)) - ((uint128_t)x8 * x8));
+{ uint128_t x16 = ((((uint128_t)(x6 + x14) * (x8 + x13)) + ((uint128_t)(x8 + x13) * (x6 + x14))) - (((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)));
+{ uint128_t x17 = ((((uint128_t)(x4 + x12) * (x8 + x13)) + (((uint128_t)(x6 + x14) * (x6 + x14)) + ((uint128_t)(x8 + x13) * (x4 + x12)))) - (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))));
+{ uint128_t x18 = ((((uint128_t)(x2 + x10) * (x8 + x13)) + (((uint128_t)(x4 + x12) * (x6 + x14)) + (((uint128_t)(x6 + x14) * (x4 + x12)) + ((uint128_t)(x8 + x13) * (x2 + x10))))) - (((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))));
+{ uint128_t x19 = ((((uint128_t)(x2 + x10) * (x6 + x14)) + (((uint128_t)(x4 + x12) * (x4 + x12)) + ((uint128_t)(x6 + x14) * (x2 + x10)))) - (((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))));
+{ uint128_t x20 = ((((uint128_t)(x2 + x10) * (x4 + x12)) + ((uint128_t)(x4 + x12) * (x2 + x10))) - (((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)));
+{ uint128_t x21 = (((uint128_t)(x2 + x10) * (x2 + x10)) - ((uint128_t)x2 * x2));
+{ uint128_t x22 = (((((uint128_t)x8 * x8) + ((uint128_t)x13 * x13)) + x19) + x15);
+{ uint128_t x23 = ((((((uint128_t)x6 * x8) + ((uint128_t)x8 * x6)) + (((uint128_t)x14 * x13) + ((uint128_t)x13 * x14))) + x20) + x16);
+{ uint128_t x24 = ((((((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + ((uint128_t)x8 * x4))) + (((uint128_t)x12 * x13) + (((uint128_t)x14 * x14) + ((uint128_t)x13 * x12)))) + x21) + x17);
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x13) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((uint128_t)x13 * x10)))));
+{ uint128_t x26 = (((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + ((uint128_t)x14 * x10)))) + x15);
+{ uint128_t x27 = (((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x10 * x12) + ((uint128_t)x12 * x10))) + x16);
+{ uint128_t x28 = ((((uint128_t)x2 * x2) + ((uint128_t)x10 * x10)) + x17);
+{ uint128_t x29 = (x25 >> 0x3c);
+{ uint64_t x30 = ((uint64_t)x25 & 0xfffffffffffffff);
+{ uint128_t x31 = (x18 >> 0x3c);
+{ uint64_t x32 = ((uint64_t)x18 & 0xfffffffffffffff);
+{ uint128_t x33 = ((0x1000000000000000 * x31) + x32);
+{ uint128_t x34 = (x33 >> 0x3c);
+{ uint64_t x35 = ((uint64_t)x33 & 0xfffffffffffffff);
+{ uint128_t x36 = ((x29 + x24) + x34);
+{ uint128_t x37 = (x36 >> 0x3c);
+{ uint64_t x38 = ((uint64_t)x36 & 0xfffffffffffffff);
+{ uint128_t x39 = (x28 + x34);
+{ uint128_t x40 = (x39 >> 0x3c);
+{ uint64_t x41 = ((uint64_t)x39 & 0xfffffffffffffff);
+{ uint128_t x42 = (x37 + x23);
+{ uint128_t x43 = (x42 >> 0x3c);
+{ uint64_t x44 = ((uint64_t)x42 & 0xfffffffffffffff);
+{ uint128_t x45 = (x40 + x27);
+{ uint128_t x46 = (x45 >> 0x3c);
+{ uint64_t x47 = ((uint64_t)x45 & 0xfffffffffffffff);
+{ uint128_t x48 = (x43 + x22);
+{ uint128_t x49 = (x48 >> 0x3c);
+{ uint64_t x50 = ((uint64_t)x48 & 0xfffffffffffffff);
+{ uint128_t x51 = (x46 + x26);
+{ uint128_t x52 = (x51 >> 0x3c);
+{ uint64_t x53 = ((uint64_t)x51 & 0xfffffffffffffff);
+{ uint128_t x54 = (x49 + x35);
+{ uint64_t x55 = (uint64_t) (x54 >> 0x3c);
+{ uint64_t x56 = ((uint64_t)x54 & 0xfffffffffffffff);
+{ uint128_t x57 = (x52 + x30);
+{ uint64_t x58 = (uint64_t) (x57 >> 0x3c);
+{ uint64_t x59 = ((uint64_t)x57 & 0xfffffffffffffff);
+{ uint128_t x60 = (((uint128_t)0x1000000000000000 * x55) + x56);
+{ uint64_t x61 = (uint64_t) (x60 >> 0x3c);
+{ uint64_t x62 = ((uint64_t)x60 & 0xfffffffffffffff);
+{ uint64_t x63 = ((x58 + x38) + x61);
+{ uint64_t x64 = (x63 >> 0x3c);
+{ uint64_t x65 = (x63 & 0xfffffffffffffff);
+{ uint64_t x66 = (x41 + x61);
+{ uint64_t x67 = (x66 >> 0x3c);
+{ uint64_t x68 = (x66 & 0xfffffffffffffff);
+out[0] = x62;
+out[1] = x50;
+out[2] = x64 + x44;
+out[3] = x65;
+out[4] = x59;
+out[5] = x53;
+out[6] = x67 + x47;
+out[7] = x68;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[8];
diff --git a/src/Specific/solinas64_2e480m2e240m1/fesquare.h b/src/Specific/solinas64_2e480m2e240m1/fesquare.h
new file mode 100644
index 000000000..14572f648
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e480m2e240m1/freeze.c b/src/Specific/solinas64_2e480m2e240m1/freeze.c
new file mode 100644
index 000000000..9d5366fac
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x16;
+out[1] = uint8_t x17 = Op Syntax.SubWithGetBorrow 60 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e480m2e240m1/freeze.h b/src/Specific/solinas64_2e480m2e240m1/freeze.h
new file mode 100644
index 000000000..edb9ad6a8
--- /dev/null
+++ b/src/Specific/solinas64_2e480m2e240m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x13, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e488m17/femul.c b/src/Specific/solinas64_2e488m17/femul.c
new file mode 100644
index 000000000..155b528f3
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/femul.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35)
+{ uint128_t x64 = (((uint128_t)x5 * x62) + (((uint128_t)x7 * x63) + (((uint128_t)x9 * x61) + (((uint128_t)x11 * x59) + (((uint128_t)x13 * x57) + (((uint128_t)x15 * x55) + (((uint128_t)x17 * x53) + (((uint128_t)x19 * x51) + (((uint128_t)x21 * x49) + (((uint128_t)x23 * x47) + (((uint128_t)x25 * x45) + (((uint128_t)x27 * x43) + (((uint128_t)x29 * x41) + (((uint128_t)x31 * x39) + (((uint128_t)x33 * x37) + ((uint128_t)x32 * x35))))))))))))))));
+{ uint128_t x65 = ((((uint128_t)x5 * x63) + (((uint128_t)0x2 * (x7 * x61)) + (((uint128_t)x9 * x59) + (((uint128_t)0x2 * (x11 * x57)) + (((uint128_t)x13 * x55) + (((uint128_t)0x2 * (x15 * x53)) + (((uint128_t)x17 * x51) + (((uint128_t)0x2 * (x19 * x49)) + (((uint128_t)x21 * x47) + (((uint128_t)0x2 * (x23 * x45)) + (((uint128_t)x25 * x43) + (((uint128_t)0x2 * (x27 * x41)) + (((uint128_t)x29 * x39) + (((uint128_t)0x2 * (x31 * x37)) + ((uint128_t)x33 * x35))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x32 * x62))));
+{ uint128_t x66 = ((((uint128_t)x5 * x61) + (((uint128_t)x7 * x59) + (((uint128_t)x9 * x57) + (((uint128_t)x11 * x55) + (((uint128_t)x13 * x53) + (((uint128_t)x15 * x51) + (((uint128_t)x17 * x49) + (((uint128_t)x19 * x47) + (((uint128_t)x21 * x45) + (((uint128_t)x23 * x43) + (((uint128_t)x25 * x41) + (((uint128_t)x27 * x39) + (((uint128_t)x29 * x37) + ((uint128_t)x31 * x35)))))))))))))) + (0x11 * (((uint128_t)x33 * x62) + ((uint128_t)x32 * x63))));
+{ uint128_t x67 = ((((uint128_t)x5 * x59) + (((uint128_t)0x2 * (x7 * x57)) + (((uint128_t)x9 * x55) + (((uint128_t)0x2 * (x11 * x53)) + (((uint128_t)x13 * x51) + (((uint128_t)0x2 * (x15 * x49)) + (((uint128_t)x17 * x47) + (((uint128_t)0x2 * (x19 * x45)) + (((uint128_t)x21 * x43) + (((uint128_t)0x2 * (x23 * x41)) + (((uint128_t)x25 * x39) + (((uint128_t)0x2 * (x27 * x37)) + ((uint128_t)x29 * x35))))))))))))) + (0x11 * (((uint128_t)0x2 * (x31 * x62)) + (((uint128_t)x33 * x63) + ((uint128_t)0x2 * (x32 * x61))))));
+{ uint128_t x68 = ((((uint128_t)x5 * x57) + (((uint128_t)x7 * x55) + (((uint128_t)x9 * x53) + (((uint128_t)x11 * x51) + (((uint128_t)x13 * x49) + (((uint128_t)x15 * x47) + (((uint128_t)x17 * x45) + (((uint128_t)x19 * x43) + (((uint128_t)x21 * x41) + (((uint128_t)x23 * x39) + (((uint128_t)x25 * x37) + ((uint128_t)x27 * x35)))))))))))) + (0x11 * (((uint128_t)x29 * x62) + (((uint128_t)x31 * x63) + (((uint128_t)x33 * x61) + ((uint128_t)x32 * x59))))));
+{ uint128_t x69 = ((((uint128_t)x5 * x55) + (((uint128_t)0x2 * (x7 * x53)) + (((uint128_t)x9 * x51) + (((uint128_t)0x2 * (x11 * x49)) + (((uint128_t)x13 * x47) + (((uint128_t)0x2 * (x15 * x45)) + (((uint128_t)x17 * x43) + (((uint128_t)0x2 * (x19 * x41)) + (((uint128_t)x21 * x39) + (((uint128_t)0x2 * (x23 * x37)) + ((uint128_t)x25 * x35))))))))))) + (0x11 * (((uint128_t)0x2 * (x27 * x62)) + (((uint128_t)x29 * x63) + (((uint128_t)0x2 * (x31 * x61)) + (((uint128_t)x33 * x59) + ((uint128_t)0x2 * (x32 * x57))))))));
+{ uint128_t x70 = ((((uint128_t)x5 * x53) + (((uint128_t)x7 * x51) + (((uint128_t)x9 * x49) + (((uint128_t)x11 * x47) + (((uint128_t)x13 * x45) + (((uint128_t)x15 * x43) + (((uint128_t)x17 * x41) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x23 * x35)))))))))) + (0x11 * (((uint128_t)x25 * x62) + (((uint128_t)x27 * x63) + (((uint128_t)x29 * x61) + (((uint128_t)x31 * x59) + (((uint128_t)x33 * x57) + ((uint128_t)x32 * x55))))))));
+{ uint128_t x71 = ((((uint128_t)x5 * x51) + (((uint128_t)0x2 * (x7 * x49)) + (((uint128_t)x9 * x47) + (((uint128_t)0x2 * (x11 * x45)) + (((uint128_t)x13 * x43) + (((uint128_t)0x2 * (x15 * x41)) + (((uint128_t)x17 * x39) + (((uint128_t)0x2 * (x19 * x37)) + ((uint128_t)x21 * x35))))))))) + (0x11 * (((uint128_t)0x2 * (x23 * x62)) + (((uint128_t)x25 * x63) + (((uint128_t)0x2 * (x27 * x61)) + (((uint128_t)x29 * x59) + (((uint128_t)0x2 * (x31 * x57)) + (((uint128_t)x33 * x55) + ((uint128_t)0x2 * (x32 * x53))))))))));
+{ uint128_t x72 = ((((uint128_t)x5 * x49) + (((uint128_t)x7 * x47) + (((uint128_t)x9 * x45) + (((uint128_t)x11 * x43) + (((uint128_t)x13 * x41) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + ((uint128_t)x19 * x35)))))))) + (0x11 * (((uint128_t)x21 * x62) + (((uint128_t)x23 * x63) + (((uint128_t)x25 * x61) + (((uint128_t)x27 * x59) + (((uint128_t)x29 * x57) + (((uint128_t)x31 * x55) + (((uint128_t)x33 * x53) + ((uint128_t)x32 * x51))))))))));
+{ uint128_t x73 = ((((uint128_t)x5 * x47) + (((uint128_t)0x2 * (x7 * x45)) + (((uint128_t)x9 * x43) + (((uint128_t)0x2 * (x11 * x41)) + (((uint128_t)x13 * x39) + (((uint128_t)0x2 * (x15 * x37)) + ((uint128_t)x17 * x35))))))) + (0x11 * (((uint128_t)0x2 * (x19 * x62)) + (((uint128_t)x21 * x63) + (((uint128_t)0x2 * (x23 * x61)) + (((uint128_t)x25 * x59) + (((uint128_t)0x2 * (x27 * x57)) + (((uint128_t)x29 * x55) + (((uint128_t)0x2 * (x31 * x53)) + (((uint128_t)x33 * x51) + ((uint128_t)0x2 * (x32 * x49))))))))))));
+{ uint128_t x74 = ((((uint128_t)x5 * x45) + (((uint128_t)x7 * x43) + (((uint128_t)x9 * x41) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + ((uint128_t)x15 * x35)))))) + (0x11 * (((uint128_t)x17 * x62) + (((uint128_t)x19 * x63) + (((uint128_t)x21 * x61) + (((uint128_t)x23 * x59) + (((uint128_t)x25 * x57) + (((uint128_t)x27 * x55) + (((uint128_t)x29 * x53) + (((uint128_t)x31 * x51) + (((uint128_t)x33 * x49) + ((uint128_t)x32 * x47))))))))))));
+{ uint128_t x75 = ((((uint128_t)x5 * x43) + (((uint128_t)0x2 * (x7 * x41)) + (((uint128_t)x9 * x39) + (((uint128_t)0x2 * (x11 * x37)) + ((uint128_t)x13 * x35))))) + (0x11 * (((uint128_t)0x2 * (x15 * x62)) + (((uint128_t)x17 * x63) + (((uint128_t)0x2 * (x19 * x61)) + (((uint128_t)x21 * x59) + (((uint128_t)0x2 * (x23 * x57)) + (((uint128_t)x25 * x55) + (((uint128_t)0x2 * (x27 * x53)) + (((uint128_t)x29 * x51) + (((uint128_t)0x2 * (x31 * x49)) + (((uint128_t)x33 * x47) + ((uint128_t)0x2 * (x32 * x45))))))))))))));
+{ uint128_t x76 = ((((uint128_t)x5 * x41) + (((uint128_t)x7 * x39) + (((uint128_t)x9 * x37) + ((uint128_t)x11 * x35)))) + (0x11 * (((uint128_t)x13 * x62) + (((uint128_t)x15 * x63) + (((uint128_t)x17 * x61) + (((uint128_t)x19 * x59) + (((uint128_t)x21 * x57) + (((uint128_t)x23 * x55) + (((uint128_t)x25 * x53) + (((uint128_t)x27 * x51) + (((uint128_t)x29 * x49) + (((uint128_t)x31 * x47) + (((uint128_t)x33 * x45) + ((uint128_t)x32 * x43))))))))))))));
+{ uint128_t x77 = ((((uint128_t)x5 * x39) + (((uint128_t)0x2 * (x7 * x37)) + ((uint128_t)x9 * x35))) + (0x11 * (((uint128_t)0x2 * (x11 * x62)) + (((uint128_t)x13 * x63) + (((uint128_t)0x2 * (x15 * x61)) + (((uint128_t)x17 * x59) + (((uint128_t)0x2 * (x19 * x57)) + (((uint128_t)x21 * x55) + (((uint128_t)0x2 * (x23 * x53)) + (((uint128_t)x25 * x51) + (((uint128_t)0x2 * (x27 * x49)) + (((uint128_t)x29 * x47) + (((uint128_t)0x2 * (x31 * x45)) + (((uint128_t)x33 * x43) + ((uint128_t)0x2 * (x32 * x41))))))))))))))));
+{ uint128_t x78 = ((((uint128_t)x5 * x37) + ((uint128_t)x7 * x35)) + (0x11 * (((uint128_t)x9 * x62) + (((uint128_t)x11 * x63) + (((uint128_t)x13 * x61) + (((uint128_t)x15 * x59) + (((uint128_t)x17 * x57) + (((uint128_t)x19 * x55) + (((uint128_t)x21 * x53) + (((uint128_t)x23 * x51) + (((uint128_t)x25 * x49) + (((uint128_t)x27 * x47) + (((uint128_t)x29 * x45) + (((uint128_t)x31 * x43) + (((uint128_t)x33 * x41) + ((uint128_t)x32 * x39))))))))))))))));
+{ uint128_t x79 = (((uint128_t)x5 * x35) + (0x11 * (((uint128_t)0x2 * (x7 * x62)) + (((uint128_t)x9 * x63) + (((uint128_t)0x2 * (x11 * x61)) + (((uint128_t)x13 * x59) + (((uint128_t)0x2 * (x15 * x57)) + (((uint128_t)x17 * x55) + (((uint128_t)0x2 * (x19 * x53)) + (((uint128_t)x21 * x51) + (((uint128_t)0x2 * (x23 * x49)) + (((uint128_t)x25 * x47) + (((uint128_t)0x2 * (x27 * x45)) + (((uint128_t)x29 * x43) + (((uint128_t)0x2 * (x31 * x41)) + (((uint128_t)x33 * x39) + ((uint128_t)0x2 * (x32 * x37))))))))))))))))));
+{ uint64_t x80 = (uint64_t) (x79 >> 0x1f);
+{ uint64_t x81 = ((uint64_t)x79 & 0x7fffffff);
+{ uint128_t x82 = (x80 + x78);
+{ uint64_t x83 = (uint64_t) (x82 >> 0x1e);
+{ uint64_t x84 = ((uint64_t)x82 & 0x3fffffff);
+{ uint128_t x85 = (x83 + x77);
+{ uint64_t x86 = (uint64_t) (x85 >> 0x1f);
+{ uint64_t x87 = ((uint64_t)x85 & 0x7fffffff);
+{ uint128_t x88 = (x86 + x76);
+{ uint64_t x89 = (uint64_t) (x88 >> 0x1e);
+{ uint64_t x90 = ((uint64_t)x88 & 0x3fffffff);
+{ uint128_t x91 = (x89 + x75);
+{ uint64_t x92 = (uint64_t) (x91 >> 0x1f);
+{ uint64_t x93 = ((uint64_t)x91 & 0x7fffffff);
+{ uint128_t x94 = (x92 + x74);
+{ uint64_t x95 = (uint64_t) (x94 >> 0x1e);
+{ uint64_t x96 = ((uint64_t)x94 & 0x3fffffff);
+{ uint128_t x97 = (x95 + x73);
+{ uint64_t x98 = (uint64_t) (x97 >> 0x1f);
+{ uint64_t x99 = ((uint64_t)x97 & 0x7fffffff);
+{ uint128_t x100 = (x98 + x72);
+{ uint64_t x101 = (uint64_t) (x100 >> 0x1e);
+{ uint64_t x102 = ((uint64_t)x100 & 0x3fffffff);
+{ uint128_t x103 = (x101 + x71);
+{ uint64_t x104 = (uint64_t) (x103 >> 0x1f);
+{ uint64_t x105 = ((uint64_t)x103 & 0x7fffffff);
+{ uint128_t x106 = (x104 + x70);
+{ uint64_t x107 = (uint64_t) (x106 >> 0x1e);
+{ uint64_t x108 = ((uint64_t)x106 & 0x3fffffff);
+{ uint128_t x109 = (x107 + x69);
+{ uint64_t x110 = (uint64_t) (x109 >> 0x1f);
+{ uint64_t x111 = ((uint64_t)x109 & 0x7fffffff);
+{ uint128_t x112 = (x110 + x68);
+{ uint64_t x113 = (uint64_t) (x112 >> 0x1e);
+{ uint64_t x114 = ((uint64_t)x112 & 0x3fffffff);
+{ uint128_t x115 = (x113 + x67);
+{ uint64_t x116 = (uint64_t) (x115 >> 0x1f);
+{ uint64_t x117 = ((uint64_t)x115 & 0x7fffffff);
+{ uint128_t x118 = (x116 + x66);
+{ uint64_t x119 = (uint64_t) (x118 >> 0x1e);
+{ uint64_t x120 = ((uint64_t)x118 & 0x3fffffff);
+{ uint128_t x121 = (x119 + x65);
+{ uint64_t x122 = (uint64_t) (x121 >> 0x1f);
+{ uint64_t x123 = ((uint64_t)x121 & 0x7fffffff);
+{ uint128_t x124 = (x122 + x64);
+{ uint64_t x125 = (uint64_t) (x124 >> 0x1e);
+{ uint64_t x126 = ((uint64_t)x124 & 0x3fffffff);
+{ uint64_t x127 = (x81 + (0x11 * x125));
+{ uint64_t x128 = (x127 >> 0x1f);
+{ uint64_t x129 = (x127 & 0x7fffffff);
+{ uint64_t x130 = (x128 + x84);
+{ uint64_t x131 = (x130 >> 0x1e);
+{ uint64_t x132 = (x130 & 0x3fffffff);
+out[0] = x126;
+out[1] = x123;
+out[2] = x120;
+out[3] = x117;
+out[4] = x114;
+out[5] = x111;
+out[6] = x108;
+out[7] = x105;
+out[8] = x102;
+out[9] = x99;
+out[10] = x96;
+out[11] = x93;
+out[12] = x90;
+out[13] = x131 + x87;
+out[14] = x132;
+out[15] = x129;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas64_2e488m17/femul.h b/src/Specific/solinas64_2e488m17/femul.h
new file mode 100644
index 000000000..c4089fc7d
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x32, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x62, uint64_t x63, uint64_t x61, uint64_t x59, uint64_t x57, uint64_t x55, uint64_t x53, uint64_t x51, uint64_t x49, uint64_t x47, uint64_t x45, uint64_t x43, uint64_t x41, uint64_t x39, uint64_t x37, uint64_t x35);
diff --git a/src/Specific/solinas64_2e488m17/femulDisplay.log b/src/Specific/solinas64_2e488m17/femulDisplay.log
new file mode 100644
index 000000000..1a52d2920
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/femulDisplay.log
@@ -0,0 +1,76 @@
+λ x x0 : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x32, x33, x31, x29, x27, x25, x23, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x62, x63, x61, x59, x57, x55, x53, x51, x49, x47, x45, x43, x41, x39, x37, x35))%core,
+ uint128_t x64 = (((uint128_t)x5 * x62) + (((uint128_t)x7 * x63) + (((uint128_t)x9 * x61) + (((uint128_t)x11 * x59) + (((uint128_t)x13 * x57) + (((uint128_t)x15 * x55) + (((uint128_t)x17 * x53) + (((uint128_t)x19 * x51) + (((uint128_t)x21 * x49) + (((uint128_t)x23 * x47) + (((uint128_t)x25 * x45) + (((uint128_t)x27 * x43) + (((uint128_t)x29 * x41) + (((uint128_t)x31 * x39) + (((uint128_t)x33 * x37) + ((uint128_t)x32 * x35))))))))))))))));
+ uint128_t x65 = ((((uint128_t)x5 * x63) + (((uint128_t)0x2 * (x7 * x61)) + (((uint128_t)x9 * x59) + (((uint128_t)0x2 * (x11 * x57)) + (((uint128_t)x13 * x55) + (((uint128_t)0x2 * (x15 * x53)) + (((uint128_t)x17 * x51) + (((uint128_t)0x2 * (x19 * x49)) + (((uint128_t)x21 * x47) + (((uint128_t)0x2 * (x23 * x45)) + (((uint128_t)x25 * x43) + (((uint128_t)0x2 * (x27 * x41)) + (((uint128_t)x29 * x39) + (((uint128_t)0x2 * (x31 * x37)) + ((uint128_t)x33 * x35))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x32 * x62))));
+ uint128_t x66 = ((((uint128_t)x5 * x61) + (((uint128_t)x7 * x59) + (((uint128_t)x9 * x57) + (((uint128_t)x11 * x55) + (((uint128_t)x13 * x53) + (((uint128_t)x15 * x51) + (((uint128_t)x17 * x49) + (((uint128_t)x19 * x47) + (((uint128_t)x21 * x45) + (((uint128_t)x23 * x43) + (((uint128_t)x25 * x41) + (((uint128_t)x27 * x39) + (((uint128_t)x29 * x37) + ((uint128_t)x31 * x35)))))))))))))) + (0x11 * (((uint128_t)x33 * x62) + ((uint128_t)x32 * x63))));
+ uint128_t x67 = ((((uint128_t)x5 * x59) + (((uint128_t)0x2 * (x7 * x57)) + (((uint128_t)x9 * x55) + (((uint128_t)0x2 * (x11 * x53)) + (((uint128_t)x13 * x51) + (((uint128_t)0x2 * (x15 * x49)) + (((uint128_t)x17 * x47) + (((uint128_t)0x2 * (x19 * x45)) + (((uint128_t)x21 * x43) + (((uint128_t)0x2 * (x23 * x41)) + (((uint128_t)x25 * x39) + (((uint128_t)0x2 * (x27 * x37)) + ((uint128_t)x29 * x35))))))))))))) + (0x11 * (((uint128_t)0x2 * (x31 * x62)) + (((uint128_t)x33 * x63) + ((uint128_t)0x2 * (x32 * x61))))));
+ uint128_t x68 = ((((uint128_t)x5 * x57) + (((uint128_t)x7 * x55) + (((uint128_t)x9 * x53) + (((uint128_t)x11 * x51) + (((uint128_t)x13 * x49) + (((uint128_t)x15 * x47) + (((uint128_t)x17 * x45) + (((uint128_t)x19 * x43) + (((uint128_t)x21 * x41) + (((uint128_t)x23 * x39) + (((uint128_t)x25 * x37) + ((uint128_t)x27 * x35)))))))))))) + (0x11 * (((uint128_t)x29 * x62) + (((uint128_t)x31 * x63) + (((uint128_t)x33 * x61) + ((uint128_t)x32 * x59))))));
+ uint128_t x69 = ((((uint128_t)x5 * x55) + (((uint128_t)0x2 * (x7 * x53)) + (((uint128_t)x9 * x51) + (((uint128_t)0x2 * (x11 * x49)) + (((uint128_t)x13 * x47) + (((uint128_t)0x2 * (x15 * x45)) + (((uint128_t)x17 * x43) + (((uint128_t)0x2 * (x19 * x41)) + (((uint128_t)x21 * x39) + (((uint128_t)0x2 * (x23 * x37)) + ((uint128_t)x25 * x35))))))))))) + (0x11 * (((uint128_t)0x2 * (x27 * x62)) + (((uint128_t)x29 * x63) + (((uint128_t)0x2 * (x31 * x61)) + (((uint128_t)x33 * x59) + ((uint128_t)0x2 * (x32 * x57))))))));
+ uint128_t x70 = ((((uint128_t)x5 * x53) + (((uint128_t)x7 * x51) + (((uint128_t)x9 * x49) + (((uint128_t)x11 * x47) + (((uint128_t)x13 * x45) + (((uint128_t)x15 * x43) + (((uint128_t)x17 * x41) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x23 * x35)))))))))) + (0x11 * (((uint128_t)x25 * x62) + (((uint128_t)x27 * x63) + (((uint128_t)x29 * x61) + (((uint128_t)x31 * x59) + (((uint128_t)x33 * x57) + ((uint128_t)x32 * x55))))))));
+ uint128_t x71 = ((((uint128_t)x5 * x51) + (((uint128_t)0x2 * (x7 * x49)) + (((uint128_t)x9 * x47) + (((uint128_t)0x2 * (x11 * x45)) + (((uint128_t)x13 * x43) + (((uint128_t)0x2 * (x15 * x41)) + (((uint128_t)x17 * x39) + (((uint128_t)0x2 * (x19 * x37)) + ((uint128_t)x21 * x35))))))))) + (0x11 * (((uint128_t)0x2 * (x23 * x62)) + (((uint128_t)x25 * x63) + (((uint128_t)0x2 * (x27 * x61)) + (((uint128_t)x29 * x59) + (((uint128_t)0x2 * (x31 * x57)) + (((uint128_t)x33 * x55) + ((uint128_t)0x2 * (x32 * x53))))))))));
+ uint128_t x72 = ((((uint128_t)x5 * x49) + (((uint128_t)x7 * x47) + (((uint128_t)x9 * x45) + (((uint128_t)x11 * x43) + (((uint128_t)x13 * x41) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + ((uint128_t)x19 * x35)))))))) + (0x11 * (((uint128_t)x21 * x62) + (((uint128_t)x23 * x63) + (((uint128_t)x25 * x61) + (((uint128_t)x27 * x59) + (((uint128_t)x29 * x57) + (((uint128_t)x31 * x55) + (((uint128_t)x33 * x53) + ((uint128_t)x32 * x51))))))))));
+ uint128_t x73 = ((((uint128_t)x5 * x47) + (((uint128_t)0x2 * (x7 * x45)) + (((uint128_t)x9 * x43) + (((uint128_t)0x2 * (x11 * x41)) + (((uint128_t)x13 * x39) + (((uint128_t)0x2 * (x15 * x37)) + ((uint128_t)x17 * x35))))))) + (0x11 * (((uint128_t)0x2 * (x19 * x62)) + (((uint128_t)x21 * x63) + (((uint128_t)0x2 * (x23 * x61)) + (((uint128_t)x25 * x59) + (((uint128_t)0x2 * (x27 * x57)) + (((uint128_t)x29 * x55) + (((uint128_t)0x2 * (x31 * x53)) + (((uint128_t)x33 * x51) + ((uint128_t)0x2 * (x32 * x49))))))))))));
+ uint128_t x74 = ((((uint128_t)x5 * x45) + (((uint128_t)x7 * x43) + (((uint128_t)x9 * x41) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + ((uint128_t)x15 * x35)))))) + (0x11 * (((uint128_t)x17 * x62) + (((uint128_t)x19 * x63) + (((uint128_t)x21 * x61) + (((uint128_t)x23 * x59) + (((uint128_t)x25 * x57) + (((uint128_t)x27 * x55) + (((uint128_t)x29 * x53) + (((uint128_t)x31 * x51) + (((uint128_t)x33 * x49) + ((uint128_t)x32 * x47))))))))))));
+ uint128_t x75 = ((((uint128_t)x5 * x43) + (((uint128_t)0x2 * (x7 * x41)) + (((uint128_t)x9 * x39) + (((uint128_t)0x2 * (x11 * x37)) + ((uint128_t)x13 * x35))))) + (0x11 * (((uint128_t)0x2 * (x15 * x62)) + (((uint128_t)x17 * x63) + (((uint128_t)0x2 * (x19 * x61)) + (((uint128_t)x21 * x59) + (((uint128_t)0x2 * (x23 * x57)) + (((uint128_t)x25 * x55) + (((uint128_t)0x2 * (x27 * x53)) + (((uint128_t)x29 * x51) + (((uint128_t)0x2 * (x31 * x49)) + (((uint128_t)x33 * x47) + ((uint128_t)0x2 * (x32 * x45))))))))))))));
+ uint128_t x76 = ((((uint128_t)x5 * x41) + (((uint128_t)x7 * x39) + (((uint128_t)x9 * x37) + ((uint128_t)x11 * x35)))) + (0x11 * (((uint128_t)x13 * x62) + (((uint128_t)x15 * x63) + (((uint128_t)x17 * x61) + (((uint128_t)x19 * x59) + (((uint128_t)x21 * x57) + (((uint128_t)x23 * x55) + (((uint128_t)x25 * x53) + (((uint128_t)x27 * x51) + (((uint128_t)x29 * x49) + (((uint128_t)x31 * x47) + (((uint128_t)x33 * x45) + ((uint128_t)x32 * x43))))))))))))));
+ uint128_t x77 = ((((uint128_t)x5 * x39) + (((uint128_t)0x2 * (x7 * x37)) + ((uint128_t)x9 * x35))) + (0x11 * (((uint128_t)0x2 * (x11 * x62)) + (((uint128_t)x13 * x63) + (((uint128_t)0x2 * (x15 * x61)) + (((uint128_t)x17 * x59) + (((uint128_t)0x2 * (x19 * x57)) + (((uint128_t)x21 * x55) + (((uint128_t)0x2 * (x23 * x53)) + (((uint128_t)x25 * x51) + (((uint128_t)0x2 * (x27 * x49)) + (((uint128_t)x29 * x47) + (((uint128_t)0x2 * (x31 * x45)) + (((uint128_t)x33 * x43) + ((uint128_t)0x2 * (x32 * x41))))))))))))))));
+ uint128_t x78 = ((((uint128_t)x5 * x37) + ((uint128_t)x7 * x35)) + (0x11 * (((uint128_t)x9 * x62) + (((uint128_t)x11 * x63) + (((uint128_t)x13 * x61) + (((uint128_t)x15 * x59) + (((uint128_t)x17 * x57) + (((uint128_t)x19 * x55) + (((uint128_t)x21 * x53) + (((uint128_t)x23 * x51) + (((uint128_t)x25 * x49) + (((uint128_t)x27 * x47) + (((uint128_t)x29 * x45) + (((uint128_t)x31 * x43) + (((uint128_t)x33 * x41) + ((uint128_t)x32 * x39))))))))))))))));
+ uint128_t x79 = (((uint128_t)x5 * x35) + (0x11 * (((uint128_t)0x2 * (x7 * x62)) + (((uint128_t)x9 * x63) + (((uint128_t)0x2 * (x11 * x61)) + (((uint128_t)x13 * x59) + (((uint128_t)0x2 * (x15 * x57)) + (((uint128_t)x17 * x55) + (((uint128_t)0x2 * (x19 * x53)) + (((uint128_t)x21 * x51) + (((uint128_t)0x2 * (x23 * x49)) + (((uint128_t)x25 * x47) + (((uint128_t)0x2 * (x27 * x45)) + (((uint128_t)x29 * x43) + (((uint128_t)0x2 * (x31 * x41)) + (((uint128_t)x33 * x39) + ((uint128_t)0x2 * (x32 * x37))))))))))))))))));
+ uint64_t x80 = (uint64_t) (x79 >> 0x1f);
+ uint64_t x81 = ((uint64_t)x79 & 0x7fffffff);
+ uint128_t x82 = (x80 + x78);
+ uint64_t x83 = (uint64_t) (x82 >> 0x1e);
+ uint64_t x84 = ((uint64_t)x82 & 0x3fffffff);
+ uint128_t x85 = (x83 + x77);
+ uint64_t x86 = (uint64_t) (x85 >> 0x1f);
+ uint64_t x87 = ((uint64_t)x85 & 0x7fffffff);
+ uint128_t x88 = (x86 + x76);
+ uint64_t x89 = (uint64_t) (x88 >> 0x1e);
+ uint64_t x90 = ((uint64_t)x88 & 0x3fffffff);
+ uint128_t x91 = (x89 + x75);
+ uint64_t x92 = (uint64_t) (x91 >> 0x1f);
+ uint64_t x93 = ((uint64_t)x91 & 0x7fffffff);
+ uint128_t x94 = (x92 + x74);
+ uint64_t x95 = (uint64_t) (x94 >> 0x1e);
+ uint64_t x96 = ((uint64_t)x94 & 0x3fffffff);
+ uint128_t x97 = (x95 + x73);
+ uint64_t x98 = (uint64_t) (x97 >> 0x1f);
+ uint64_t x99 = ((uint64_t)x97 & 0x7fffffff);
+ uint128_t x100 = (x98 + x72);
+ uint64_t x101 = (uint64_t) (x100 >> 0x1e);
+ uint64_t x102 = ((uint64_t)x100 & 0x3fffffff);
+ uint128_t x103 = (x101 + x71);
+ uint64_t x104 = (uint64_t) (x103 >> 0x1f);
+ uint64_t x105 = ((uint64_t)x103 & 0x7fffffff);
+ uint128_t x106 = (x104 + x70);
+ uint64_t x107 = (uint64_t) (x106 >> 0x1e);
+ uint64_t x108 = ((uint64_t)x106 & 0x3fffffff);
+ uint128_t x109 = (x107 + x69);
+ uint64_t x110 = (uint64_t) (x109 >> 0x1f);
+ uint64_t x111 = ((uint64_t)x109 & 0x7fffffff);
+ uint128_t x112 = (x110 + x68);
+ uint64_t x113 = (uint64_t) (x112 >> 0x1e);
+ uint64_t x114 = ((uint64_t)x112 & 0x3fffffff);
+ uint128_t x115 = (x113 + x67);
+ uint64_t x116 = (uint64_t) (x115 >> 0x1f);
+ uint64_t x117 = ((uint64_t)x115 & 0x7fffffff);
+ uint128_t x118 = (x116 + x66);
+ uint64_t x119 = (uint64_t) (x118 >> 0x1e);
+ uint64_t x120 = ((uint64_t)x118 & 0x3fffffff);
+ uint128_t x121 = (x119 + x65);
+ uint64_t x122 = (uint64_t) (x121 >> 0x1f);
+ uint64_t x123 = ((uint64_t)x121 & 0x7fffffff);
+ uint128_t x124 = (x122 + x64);
+ uint64_t x125 = (uint64_t) (x124 >> 0x1e);
+ uint64_t x126 = ((uint64_t)x124 & 0x3fffffff);
+ uint64_t x127 = (x81 + (0x11 * x125));
+ uint64_t x128 = (x127 >> 0x1f);
+ uint64_t x129 = (x127 & 0x7fffffff);
+ uint64_t x130 = (x128 + x84);
+ uint64_t x131 = (x130 >> 0x1e);
+ uint64_t x132 = (x130 & 0x3fffffff);
+ return (Return x126, Return x123, Return x120, Return x117, Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, Return x93, Return x90, (x131 + x87), Return x132, Return x129))
+(x, x0)%core
+ : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 → word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 → ReturnType (uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t)
diff --git a/src/Specific/solinas64_2e488m17/fesquare.c b/src/Specific/solinas64_2e488m17/fesquare.c
new file mode 100644
index 000000000..ff95eaf88
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/fesquare.c
@@ -0,0 +1,106 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x31 = (((uint128_t)x2 * x29) + (((uint128_t)x4 * x30) + (((uint128_t)x6 * x28) + (((uint128_t)x8 * x26) + (((uint128_t)x10 * x24) + (((uint128_t)x12 * x22) + (((uint128_t)x14 * x20) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + (((uint128_t)x20 * x14) + (((uint128_t)x22 * x12) + (((uint128_t)x24 * x10) + (((uint128_t)x26 * x8) + (((uint128_t)x28 * x6) + (((uint128_t)x30 * x4) + ((uint128_t)x29 * x2))))))))))))))));
+{ uint128_t x32 = ((((uint128_t)x2 * x30) + (((uint128_t)0x2 * (x4 * x28)) + (((uint128_t)x6 * x26) + (((uint128_t)0x2 * (x8 * x24)) + (((uint128_t)x10 * x22) + (((uint128_t)0x2 * (x12 * x20)) + (((uint128_t)x14 * x18) + (((uint128_t)0x2 * (x16 * x16)) + (((uint128_t)x18 * x14) + (((uint128_t)0x2 * (x20 * x12)) + (((uint128_t)x22 * x10) + (((uint128_t)0x2 * (x24 * x8)) + (((uint128_t)x26 * x6) + (((uint128_t)0x2 * (x28 * x4)) + ((uint128_t)x30 * x2))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x29 * x29))));
+{ uint128_t x33 = ((((uint128_t)x2 * x28) + (((uint128_t)x4 * x26) + (((uint128_t)x6 * x24) + (((uint128_t)x8 * x22) + (((uint128_t)x10 * x20) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + (((uint128_t)x20 * x10) + (((uint128_t)x22 * x8) + (((uint128_t)x24 * x6) + (((uint128_t)x26 * x4) + ((uint128_t)x28 * x2)))))))))))))) + (0x11 * (((uint128_t)x30 * x29) + ((uint128_t)x29 * x30))));
+{ uint128_t x34 = ((((uint128_t)x2 * x26) + (((uint128_t)0x2 * (x4 * x24)) + (((uint128_t)x6 * x22) + (((uint128_t)0x2 * (x8 * x20)) + (((uint128_t)x10 * x18) + (((uint128_t)0x2 * (x12 * x16)) + (((uint128_t)x14 * x14) + (((uint128_t)0x2 * (x16 * x12)) + (((uint128_t)x18 * x10) + (((uint128_t)0x2 * (x20 * x8)) + (((uint128_t)x22 * x6) + (((uint128_t)0x2 * (x24 * x4)) + ((uint128_t)x26 * x2))))))))))))) + (0x11 * (((uint128_t)0x2 * (x28 * x29)) + (((uint128_t)x30 * x30) + ((uint128_t)0x2 * (x29 * x28))))));
+{ uint128_t x35 = ((((uint128_t)x2 * x24) + (((uint128_t)x4 * x22) + (((uint128_t)x6 * x20) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + (((uint128_t)x20 * x6) + (((uint128_t)x22 * x4) + ((uint128_t)x24 * x2)))))))))))) + (0x11 * (((uint128_t)x26 * x29) + (((uint128_t)x28 * x30) + (((uint128_t)x30 * x28) + ((uint128_t)x29 * x26))))));
+{ uint128_t x36 = ((((uint128_t)x2 * x22) + (((uint128_t)0x2 * (x4 * x20)) + (((uint128_t)x6 * x18) + (((uint128_t)0x2 * (x8 * x16)) + (((uint128_t)x10 * x14) + (((uint128_t)0x2 * (x12 * x12)) + (((uint128_t)x14 * x10) + (((uint128_t)0x2 * (x16 * x8)) + (((uint128_t)x18 * x6) + (((uint128_t)0x2 * (x20 * x4)) + ((uint128_t)x22 * x2))))))))))) + (0x11 * (((uint128_t)0x2 * (x24 * x29)) + (((uint128_t)x26 * x30) + (((uint128_t)0x2 * (x28 * x28)) + (((uint128_t)x30 * x26) + ((uint128_t)0x2 * (x29 * x24))))))));
+{ uint128_t x37 = ((((uint128_t)x2 * x20) + (((uint128_t)x4 * x18) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + (((uint128_t)x18 * x4) + ((uint128_t)x20 * x2)))))))))) + (0x11 * (((uint128_t)x22 * x29) + (((uint128_t)x24 * x30) + (((uint128_t)x26 * x28) + (((uint128_t)x28 * x26) + (((uint128_t)x30 * x24) + ((uint128_t)x29 * x22))))))));
+{ uint128_t x38 = ((((uint128_t)x2 * x18) + (((uint128_t)0x2 * (x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)0x2 * (x8 * x12)) + (((uint128_t)x10 * x10) + (((uint128_t)0x2 * (x12 * x8)) + (((uint128_t)x14 * x6) + (((uint128_t)0x2 * (x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x11 * (((uint128_t)0x2 * (x20 * x29)) + (((uint128_t)x22 * x30) + (((uint128_t)0x2 * (x24 * x28)) + (((uint128_t)x26 * x26) + (((uint128_t)0x2 * (x28 * x24)) + (((uint128_t)x30 * x22) + ((uint128_t)0x2 * (x29 * x20))))))))));
+{ uint128_t x39 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * (((uint128_t)x18 * x29) + (((uint128_t)x20 * x30) + (((uint128_t)x22 * x28) + (((uint128_t)x24 * x26) + (((uint128_t)x26 * x24) + (((uint128_t)x28 * x22) + (((uint128_t)x30 * x20) + ((uint128_t)x29 * x18))))))))));
+{ uint128_t x40 = ((((uint128_t)x2 * x14) + (((uint128_t)0x2 * (x4 * x12)) + (((uint128_t)x6 * x10) + (((uint128_t)0x2 * (x8 * x8)) + (((uint128_t)x10 * x6) + (((uint128_t)0x2 * (x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)0x2 * (x16 * x29)) + (((uint128_t)x18 * x30) + (((uint128_t)0x2 * (x20 * x28)) + (((uint128_t)x22 * x26) + (((uint128_t)0x2 * (x24 * x24)) + (((uint128_t)x26 * x22) + (((uint128_t)0x2 * (x28 * x20)) + (((uint128_t)x30 * x18) + ((uint128_t)0x2 * (x29 * x16))))))))))));
+{ uint128_t x41 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x29) + (((uint128_t)x16 * x30) + (((uint128_t)x18 * x28) + (((uint128_t)x20 * x26) + (((uint128_t)x22 * x24) + (((uint128_t)x24 * x22) + (((uint128_t)x26 * x20) + (((uint128_t)x28 * x18) + (((uint128_t)x30 * x16) + ((uint128_t)x29 * x14))))))))))));
+{ uint128_t x42 = ((((uint128_t)x2 * x10) + (((uint128_t)0x2 * (x4 * x8)) + (((uint128_t)x6 * x6) + (((uint128_t)0x2 * (x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)0x2 * (x12 * x29)) + (((uint128_t)x14 * x30) + (((uint128_t)0x2 * (x16 * x28)) + (((uint128_t)x18 * x26) + (((uint128_t)0x2 * (x20 * x24)) + (((uint128_t)x22 * x22) + (((uint128_t)0x2 * (x24 * x20)) + (((uint128_t)x26 * x18) + (((uint128_t)0x2 * (x28 * x16)) + (((uint128_t)x30 * x14) + ((uint128_t)0x2 * (x29 * x12))))))))))))));
+{ uint128_t x43 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x29) + (((uint128_t)x12 * x30) + (((uint128_t)x14 * x28) + (((uint128_t)x16 * x26) + (((uint128_t)x18 * x24) + (((uint128_t)x20 * x22) + (((uint128_t)x22 * x20) + (((uint128_t)x24 * x18) + (((uint128_t)x26 * x16) + (((uint128_t)x28 * x14) + (((uint128_t)x30 * x12) + ((uint128_t)x29 * x10))))))))))))));
+{ uint128_t x44 = ((((uint128_t)x2 * x6) + (((uint128_t)0x2 * (x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)0x2 * (x8 * x29)) + (((uint128_t)x10 * x30) + (((uint128_t)0x2 * (x12 * x28)) + (((uint128_t)x14 * x26) + (((uint128_t)0x2 * (x16 * x24)) + (((uint128_t)x18 * x22) + (((uint128_t)0x2 * (x20 * x20)) + (((uint128_t)x22 * x18) + (((uint128_t)0x2 * (x24 * x16)) + (((uint128_t)x26 * x14) + (((uint128_t)0x2 * (x28 * x12)) + (((uint128_t)x30 * x10) + ((uint128_t)0x2 * (x29 * x8))))))))))))))));
+{ uint128_t x45 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x29) + (((uint128_t)x8 * x30) + (((uint128_t)x10 * x28) + (((uint128_t)x12 * x26) + (((uint128_t)x14 * x24) + (((uint128_t)x16 * x22) + (((uint128_t)x18 * x20) + (((uint128_t)x20 * x18) + (((uint128_t)x22 * x16) + (((uint128_t)x24 * x14) + (((uint128_t)x26 * x12) + (((uint128_t)x28 * x10) + (((uint128_t)x30 * x8) + ((uint128_t)x29 * x6))))))))))))))));
+{ uint128_t x46 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)0x2 * (x4 * x29)) + (((uint128_t)x6 * x30) + (((uint128_t)0x2 * (x8 * x28)) + (((uint128_t)x10 * x26) + (((uint128_t)0x2 * (x12 * x24)) + (((uint128_t)x14 * x22) + (((uint128_t)0x2 * (x16 * x20)) + (((uint128_t)x18 * x18) + (((uint128_t)0x2 * (x20 * x16)) + (((uint128_t)x22 * x14) + (((uint128_t)0x2 * (x24 * x12)) + (((uint128_t)x26 * x10) + (((uint128_t)0x2 * (x28 * x8)) + (((uint128_t)x30 * x6) + ((uint128_t)0x2 * (x29 * x4))))))))))))))))));
+{ uint64_t x47 = (uint64_t) (x46 >> 0x1f);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7fffffff);
+{ uint128_t x49 = (x47 + x45);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x1e);
+{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffff);
+{ uint128_t x52 = (x50 + x44);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x1f);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffff);
+{ uint128_t x55 = (x53 + x43);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x1e);
+{ uint64_t x57 = ((uint64_t)x55 & 0x3fffffff);
+{ uint128_t x58 = (x56 + x42);
+{ uint64_t x59 = (uint64_t) (x58 >> 0x1f);
+{ uint64_t x60 = ((uint64_t)x58 & 0x7fffffff);
+{ uint128_t x61 = (x59 + x41);
+{ uint64_t x62 = (uint64_t) (x61 >> 0x1e);
+{ uint64_t x63 = ((uint64_t)x61 & 0x3fffffff);
+{ uint128_t x64 = (x62 + x40);
+{ uint64_t x65 = (uint64_t) (x64 >> 0x1f);
+{ uint64_t x66 = ((uint64_t)x64 & 0x7fffffff);
+{ uint128_t x67 = (x65 + x39);
+{ uint64_t x68 = (uint64_t) (x67 >> 0x1e);
+{ uint64_t x69 = ((uint64_t)x67 & 0x3fffffff);
+{ uint128_t x70 = (x68 + x38);
+{ uint64_t x71 = (uint64_t) (x70 >> 0x1f);
+{ uint64_t x72 = ((uint64_t)x70 & 0x7fffffff);
+{ uint128_t x73 = (x71 + x37);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x1e);
+{ uint64_t x75 = ((uint64_t)x73 & 0x3fffffff);
+{ uint128_t x76 = (x74 + x36);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x1f);
+{ uint64_t x78 = ((uint64_t)x76 & 0x7fffffff);
+{ uint128_t x79 = (x77 + x35);
+{ uint64_t x80 = (uint64_t) (x79 >> 0x1e);
+{ uint64_t x81 = ((uint64_t)x79 & 0x3fffffff);
+{ uint128_t x82 = (x80 + x34);
+{ uint64_t x83 = (uint64_t) (x82 >> 0x1f);
+{ uint64_t x84 = ((uint64_t)x82 & 0x7fffffff);
+{ uint128_t x85 = (x83 + x33);
+{ uint64_t x86 = (uint64_t) (x85 >> 0x1e);
+{ uint64_t x87 = ((uint64_t)x85 & 0x3fffffff);
+{ uint128_t x88 = (x86 + x32);
+{ uint64_t x89 = (uint64_t) (x88 >> 0x1f);
+{ uint64_t x90 = ((uint64_t)x88 & 0x7fffffff);
+{ uint128_t x91 = (x89 + x31);
+{ uint64_t x92 = (uint64_t) (x91 >> 0x1e);
+{ uint64_t x93 = ((uint64_t)x91 & 0x3fffffff);
+{ uint64_t x94 = (x48 + (0x11 * x92));
+{ uint64_t x95 = (x94 >> 0x1f);
+{ uint64_t x96 = (x94 & 0x7fffffff);
+{ uint64_t x97 = (x95 + x51);
+{ uint64_t x98 = (x97 >> 0x1e);
+{ uint64_t x99 = (x97 & 0x3fffffff);
+out[0] = x93;
+out[1] = x90;
+out[2] = x87;
+out[3] = x84;
+out[4] = x81;
+out[5] = x78;
+out[6] = x75;
+out[7] = x72;
+out[8] = x69;
+out[9] = x66;
+out[10] = x63;
+out[11] = x60;
+out[12] = x57;
+out[13] = x98 + x54;
+out[14] = x99;
+out[15] = x96;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[16];
diff --git a/src/Specific/solinas64_2e488m17/fesquare.h b/src/Specific/solinas64_2e488m17/fesquare.h
new file mode 100644
index 000000000..c86247b3d
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e488m17/fesquareDisplay.log b/src/Specific/solinas64_2e488m17/fesquareDisplay.log
new file mode 100644
index 000000000..be98a5058
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/fesquareDisplay.log
@@ -0,0 +1,76 @@
+λ x : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint128_t x31 = (((uint128_t)x2 * x29) + (((uint128_t)x4 * x30) + (((uint128_t)x6 * x28) + (((uint128_t)x8 * x26) + (((uint128_t)x10 * x24) + (((uint128_t)x12 * x22) + (((uint128_t)x14 * x20) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + (((uint128_t)x20 * x14) + (((uint128_t)x22 * x12) + (((uint128_t)x24 * x10) + (((uint128_t)x26 * x8) + (((uint128_t)x28 * x6) + (((uint128_t)x30 * x4) + ((uint128_t)x29 * x2))))))))))))))));
+ uint128_t x32 = ((((uint128_t)x2 * x30) + (((uint128_t)0x2 * (x4 * x28)) + (((uint128_t)x6 * x26) + (((uint128_t)0x2 * (x8 * x24)) + (((uint128_t)x10 * x22) + (((uint128_t)0x2 * (x12 * x20)) + (((uint128_t)x14 * x18) + (((uint128_t)0x2 * (x16 * x16)) + (((uint128_t)x18 * x14) + (((uint128_t)0x2 * (x20 * x12)) + (((uint128_t)x22 * x10) + (((uint128_t)0x2 * (x24 * x8)) + (((uint128_t)x26 * x6) + (((uint128_t)0x2 * (x28 * x4)) + ((uint128_t)x30 * x2))))))))))))))) + (0x11 * ((uint128_t)0x2 * (x29 * x29))));
+ uint128_t x33 = ((((uint128_t)x2 * x28) + (((uint128_t)x4 * x26) + (((uint128_t)x6 * x24) + (((uint128_t)x8 * x22) + (((uint128_t)x10 * x20) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + (((uint128_t)x20 * x10) + (((uint128_t)x22 * x8) + (((uint128_t)x24 * x6) + (((uint128_t)x26 * x4) + ((uint128_t)x28 * x2)))))))))))))) + (0x11 * (((uint128_t)x30 * x29) + ((uint128_t)x29 * x30))));
+ uint128_t x34 = ((((uint128_t)x2 * x26) + (((uint128_t)0x2 * (x4 * x24)) + (((uint128_t)x6 * x22) + (((uint128_t)0x2 * (x8 * x20)) + (((uint128_t)x10 * x18) + (((uint128_t)0x2 * (x12 * x16)) + (((uint128_t)x14 * x14) + (((uint128_t)0x2 * (x16 * x12)) + (((uint128_t)x18 * x10) + (((uint128_t)0x2 * (x20 * x8)) + (((uint128_t)x22 * x6) + (((uint128_t)0x2 * (x24 * x4)) + ((uint128_t)x26 * x2))))))))))))) + (0x11 * (((uint128_t)0x2 * (x28 * x29)) + (((uint128_t)x30 * x30) + ((uint128_t)0x2 * (x29 * x28))))));
+ uint128_t x35 = ((((uint128_t)x2 * x24) + (((uint128_t)x4 * x22) + (((uint128_t)x6 * x20) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + (((uint128_t)x20 * x6) + (((uint128_t)x22 * x4) + ((uint128_t)x24 * x2)))))))))))) + (0x11 * (((uint128_t)x26 * x29) + (((uint128_t)x28 * x30) + (((uint128_t)x30 * x28) + ((uint128_t)x29 * x26))))));
+ uint128_t x36 = ((((uint128_t)x2 * x22) + (((uint128_t)0x2 * (x4 * x20)) + (((uint128_t)x6 * x18) + (((uint128_t)0x2 * (x8 * x16)) + (((uint128_t)x10 * x14) + (((uint128_t)0x2 * (x12 * x12)) + (((uint128_t)x14 * x10) + (((uint128_t)0x2 * (x16 * x8)) + (((uint128_t)x18 * x6) + (((uint128_t)0x2 * (x20 * x4)) + ((uint128_t)x22 * x2))))))))))) + (0x11 * (((uint128_t)0x2 * (x24 * x29)) + (((uint128_t)x26 * x30) + (((uint128_t)0x2 * (x28 * x28)) + (((uint128_t)x30 * x26) + ((uint128_t)0x2 * (x29 * x24))))))));
+ uint128_t x37 = ((((uint128_t)x2 * x20) + (((uint128_t)x4 * x18) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + (((uint128_t)x18 * x4) + ((uint128_t)x20 * x2)))))))))) + (0x11 * (((uint128_t)x22 * x29) + (((uint128_t)x24 * x30) + (((uint128_t)x26 * x28) + (((uint128_t)x28 * x26) + (((uint128_t)x30 * x24) + ((uint128_t)x29 * x22))))))));
+ uint128_t x38 = ((((uint128_t)x2 * x18) + (((uint128_t)0x2 * (x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)0x2 * (x8 * x12)) + (((uint128_t)x10 * x10) + (((uint128_t)0x2 * (x12 * x8)) + (((uint128_t)x14 * x6) + (((uint128_t)0x2 * (x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x11 * (((uint128_t)0x2 * (x20 * x29)) + (((uint128_t)x22 * x30) + (((uint128_t)0x2 * (x24 * x28)) + (((uint128_t)x26 * x26) + (((uint128_t)0x2 * (x28 * x24)) + (((uint128_t)x30 * x22) + ((uint128_t)0x2 * (x29 * x20))))))))));
+ uint128_t x39 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x11 * (((uint128_t)x18 * x29) + (((uint128_t)x20 * x30) + (((uint128_t)x22 * x28) + (((uint128_t)x24 * x26) + (((uint128_t)x26 * x24) + (((uint128_t)x28 * x22) + (((uint128_t)x30 * x20) + ((uint128_t)x29 * x18))))))))));
+ uint128_t x40 = ((((uint128_t)x2 * x14) + (((uint128_t)0x2 * (x4 * x12)) + (((uint128_t)x6 * x10) + (((uint128_t)0x2 * (x8 * x8)) + (((uint128_t)x10 * x6) + (((uint128_t)0x2 * (x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x11 * (((uint128_t)0x2 * (x16 * x29)) + (((uint128_t)x18 * x30) + (((uint128_t)0x2 * (x20 * x28)) + (((uint128_t)x22 * x26) + (((uint128_t)0x2 * (x24 * x24)) + (((uint128_t)x26 * x22) + (((uint128_t)0x2 * (x28 * x20)) + (((uint128_t)x30 * x18) + ((uint128_t)0x2 * (x29 * x16))))))))))));
+ uint128_t x41 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x11 * (((uint128_t)x14 * x29) + (((uint128_t)x16 * x30) + (((uint128_t)x18 * x28) + (((uint128_t)x20 * x26) + (((uint128_t)x22 * x24) + (((uint128_t)x24 * x22) + (((uint128_t)x26 * x20) + (((uint128_t)x28 * x18) + (((uint128_t)x30 * x16) + ((uint128_t)x29 * x14))))))))))));
+ uint128_t x42 = ((((uint128_t)x2 * x10) + (((uint128_t)0x2 * (x4 * x8)) + (((uint128_t)x6 * x6) + (((uint128_t)0x2 * (x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x11 * (((uint128_t)0x2 * (x12 * x29)) + (((uint128_t)x14 * x30) + (((uint128_t)0x2 * (x16 * x28)) + (((uint128_t)x18 * x26) + (((uint128_t)0x2 * (x20 * x24)) + (((uint128_t)x22 * x22) + (((uint128_t)0x2 * (x24 * x20)) + (((uint128_t)x26 * x18) + (((uint128_t)0x2 * (x28 * x16)) + (((uint128_t)x30 * x14) + ((uint128_t)0x2 * (x29 * x12))))))))))))));
+ uint128_t x43 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x11 * (((uint128_t)x10 * x29) + (((uint128_t)x12 * x30) + (((uint128_t)x14 * x28) + (((uint128_t)x16 * x26) + (((uint128_t)x18 * x24) + (((uint128_t)x20 * x22) + (((uint128_t)x22 * x20) + (((uint128_t)x24 * x18) + (((uint128_t)x26 * x16) + (((uint128_t)x28 * x14) + (((uint128_t)x30 * x12) + ((uint128_t)x29 * x10))))))))))))));
+ uint128_t x44 = ((((uint128_t)x2 * x6) + (((uint128_t)0x2 * (x4 * x4)) + ((uint128_t)x6 * x2))) + (0x11 * (((uint128_t)0x2 * (x8 * x29)) + (((uint128_t)x10 * x30) + (((uint128_t)0x2 * (x12 * x28)) + (((uint128_t)x14 * x26) + (((uint128_t)0x2 * (x16 * x24)) + (((uint128_t)x18 * x22) + (((uint128_t)0x2 * (x20 * x20)) + (((uint128_t)x22 * x18) + (((uint128_t)0x2 * (x24 * x16)) + (((uint128_t)x26 * x14) + (((uint128_t)0x2 * (x28 * x12)) + (((uint128_t)x30 * x10) + ((uint128_t)0x2 * (x29 * x8))))))))))))))));
+ uint128_t x45 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x11 * (((uint128_t)x6 * x29) + (((uint128_t)x8 * x30) + (((uint128_t)x10 * x28) + (((uint128_t)x12 * x26) + (((uint128_t)x14 * x24) + (((uint128_t)x16 * x22) + (((uint128_t)x18 * x20) + (((uint128_t)x20 * x18) + (((uint128_t)x22 * x16) + (((uint128_t)x24 * x14) + (((uint128_t)x26 * x12) + (((uint128_t)x28 * x10) + (((uint128_t)x30 * x8) + ((uint128_t)x29 * x6))))))))))))))));
+ uint128_t x46 = (((uint128_t)x2 * x2) + (0x11 * (((uint128_t)0x2 * (x4 * x29)) + (((uint128_t)x6 * x30) + (((uint128_t)0x2 * (x8 * x28)) + (((uint128_t)x10 * x26) + (((uint128_t)0x2 * (x12 * x24)) + (((uint128_t)x14 * x22) + (((uint128_t)0x2 * (x16 * x20)) + (((uint128_t)x18 * x18) + (((uint128_t)0x2 * (x20 * x16)) + (((uint128_t)x22 * x14) + (((uint128_t)0x2 * (x24 * x12)) + (((uint128_t)x26 * x10) + (((uint128_t)0x2 * (x28 * x8)) + (((uint128_t)x30 * x6) + ((uint128_t)0x2 * (x29 * x4))))))))))))))))));
+ uint64_t x47 = (uint64_t) (x46 >> 0x1f);
+ uint64_t x48 = ((uint64_t)x46 & 0x7fffffff);
+ uint128_t x49 = (x47 + x45);
+ uint64_t x50 = (uint64_t) (x49 >> 0x1e);
+ uint64_t x51 = ((uint64_t)x49 & 0x3fffffff);
+ uint128_t x52 = (x50 + x44);
+ uint64_t x53 = (uint64_t) (x52 >> 0x1f);
+ uint64_t x54 = ((uint64_t)x52 & 0x7fffffff);
+ uint128_t x55 = (x53 + x43);
+ uint64_t x56 = (uint64_t) (x55 >> 0x1e);
+ uint64_t x57 = ((uint64_t)x55 & 0x3fffffff);
+ uint128_t x58 = (x56 + x42);
+ uint64_t x59 = (uint64_t) (x58 >> 0x1f);
+ uint64_t x60 = ((uint64_t)x58 & 0x7fffffff);
+ uint128_t x61 = (x59 + x41);
+ uint64_t x62 = (uint64_t) (x61 >> 0x1e);
+ uint64_t x63 = ((uint64_t)x61 & 0x3fffffff);
+ uint128_t x64 = (x62 + x40);
+ uint64_t x65 = (uint64_t) (x64 >> 0x1f);
+ uint64_t x66 = ((uint64_t)x64 & 0x7fffffff);
+ uint128_t x67 = (x65 + x39);
+ uint64_t x68 = (uint64_t) (x67 >> 0x1e);
+ uint64_t x69 = ((uint64_t)x67 & 0x3fffffff);
+ uint128_t x70 = (x68 + x38);
+ uint64_t x71 = (uint64_t) (x70 >> 0x1f);
+ uint64_t x72 = ((uint64_t)x70 & 0x7fffffff);
+ uint128_t x73 = (x71 + x37);
+ uint64_t x74 = (uint64_t) (x73 >> 0x1e);
+ uint64_t x75 = ((uint64_t)x73 & 0x3fffffff);
+ uint128_t x76 = (x74 + x36);
+ uint64_t x77 = (uint64_t) (x76 >> 0x1f);
+ uint64_t x78 = ((uint64_t)x76 & 0x7fffffff);
+ uint128_t x79 = (x77 + x35);
+ uint64_t x80 = (uint64_t) (x79 >> 0x1e);
+ uint64_t x81 = ((uint64_t)x79 & 0x3fffffff);
+ uint128_t x82 = (x80 + x34);
+ uint64_t x83 = (uint64_t) (x82 >> 0x1f);
+ uint64_t x84 = ((uint64_t)x82 & 0x7fffffff);
+ uint128_t x85 = (x83 + x33);
+ uint64_t x86 = (uint64_t) (x85 >> 0x1e);
+ uint64_t x87 = ((uint64_t)x85 & 0x3fffffff);
+ uint128_t x88 = (x86 + x32);
+ uint64_t x89 = (uint64_t) (x88 >> 0x1f);
+ uint64_t x90 = ((uint64_t)x88 & 0x7fffffff);
+ uint128_t x91 = (x89 + x31);
+ uint64_t x92 = (uint64_t) (x91 >> 0x1e);
+ uint64_t x93 = ((uint64_t)x91 & 0x3fffffff);
+ uint64_t x94 = (x48 + (0x11 * x92));
+ uint64_t x95 = (x94 >> 0x1f);
+ uint64_t x96 = (x94 & 0x7fffffff);
+ uint64_t x97 = (x95 + x51);
+ uint64_t x98 = (x97 >> 0x1e);
+ uint64_t x99 = (x97 & 0x3fffffff);
+ return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, Return x72, Return x69, Return x66, Return x63, Return x60, Return x57, (x98 + x54), Return x99, Return x96))
+x
+ : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 → ReturnType (uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t)
diff --git a/src/Specific/solinas64_2e488m17/freeze.c b/src/Specific/solinas64_2e488m17/freeze.c
new file mode 100644
index 000000000..15e32ebf7
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x32;
+out[1] = uint8_t x33 = Op Syntax.SubWithGetBorrow 31 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = Const 2147483631;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e488m17/freeze.h b/src/Specific/solinas64_2e488m17/freeze.h
new file mode 100644
index 000000000..a955633b6
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x29, uint64_t x30, uint64_t x28, uint64_t x26, uint64_t x24, uint64_t x22, uint64_t x20, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e488m17/freezeDisplay.log b/src/Specific/solinas64_2e488m17/freezeDisplay.log
new file mode 100644
index 000000000..483ee09e1
--- /dev/null
+++ b/src/Specific/solinas64_2e488m17/freezeDisplay.log
@@ -0,0 +1,56 @@
+λ x : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x29, x30, x28, x26, x24, x22, x20, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint64_t x32, uint8_t x33 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x2, Const 2147483631);
+ uint64_t x35, uint8_t x36 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x33, Return x4, 0x3fffffff);
+ uint64_t x38, uint8_t x39 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x36, Return x6, 0x7fffffff);
+ uint64_t x41, uint8_t x42 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x39, Return x8, 0x3fffffff);
+ uint64_t x44, uint8_t x45 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x42, Return x10, 0x7fffffff);
+ uint64_t x47, uint8_t x48 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x45, Return x12, 0x3fffffff);
+ uint64_t x50, uint8_t x51 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x48, Return x14, 0x7fffffff);
+ uint64_t x53, uint8_t x54 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x51, Return x16, 0x3fffffff);
+ uint64_t x56, uint8_t x57 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x54, Return x18, 0x7fffffff);
+ uint64_t x59, uint8_t x60 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x57, Return x20, 0x3fffffff);
+ uint64_t x62, uint8_t x63 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x60, Return x22, 0x7fffffff);
+ uint64_t x65, uint8_t x66 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x63, Return x24, 0x3fffffff);
+ uint64_t x68, uint8_t x69 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x66, Return x26, 0x7fffffff);
+ uint64_t x71, uint8_t x72 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x69, Return x28, 0x3fffffff);
+ uint64_t x74, uint8_t x75 = Op (Syntax.SubWithGetBorrow 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x72, Return x30, 0x7fffffff);
+ uint64_t x77, uint8_t x78 = Op (Syntax.SubWithGetBorrow 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x75, Return x29, 0x3fffffff);
+ uint64_t x79 = (uint64_t)cmovznz(x78, 0x0, 0xffffffffffffffffL);
+ uint64_t x80 = (x79 & Const 2147483631);
+ uint64_t x82, uint8_t x83 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (0x0, Return x32, Return x80);
+ uint64_t x84 = (x79 & 0x3fffffff);
+ uint64_t x86, uint8_t x87 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x83, Return x35, Return x84);
+ uint64_t x88 = (x79 & 0x7fffffff);
+ uint64_t x90, uint8_t x91 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x87, Return x38, Return x88);
+ uint64_t x92 = (x79 & 0x3fffffff);
+ uint64_t x94, uint8_t x95 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x91, Return x41, Return x92);
+ uint64_t x96 = (x79 & 0x7fffffff);
+ uint64_t x98, uint8_t x99 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x95, Return x44, Return x96);
+ uint64_t x100 = (x79 & 0x3fffffff);
+ uint64_t x102, uint8_t x103 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x99, Return x47, Return x100);
+ uint64_t x104 = (x79 & 0x7fffffff);
+ uint64_t x106, uint8_t x107 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x103, Return x50, Return x104);
+ uint64_t x108 = (x79 & 0x3fffffff);
+ uint64_t x110, uint8_t x111 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x107, Return x53, Return x108);
+ uint64_t x112 = (x79 & 0x7fffffff);
+ uint64_t x114, uint8_t x115 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x111, Return x56, Return x112);
+ uint64_t x116 = (x79 & 0x3fffffff);
+ uint64_t x118, uint8_t x119 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x115, Return x59, Return x116);
+ uint64_t x120 = (x79 & 0x7fffffff);
+ uint64_t x122, uint8_t x123 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x119, Return x62, Return x120);
+ uint64_t x124 = (x79 & 0x3fffffff);
+ uint64_t x126, uint8_t x127 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x123, Return x65, Return x124);
+ uint64_t x128 = (x79 & 0x7fffffff);
+ uint64_t x130, uint8_t x131 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x127, Return x68, Return x128);
+ uint64_t x132 = (x79 & 0x3fffffff);
+ uint64_t x134, uint8_t x135 = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x131, Return x71, Return x132);
+ uint64_t x136 = (x79 & 0x7fffffff);
+ uint64_t x138, uint8_t x139 = Op (Syntax.AddWithGetCarry 31 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x135, Return x74, Return x136);
+ uint64_t x140 = (x79 & 0x3fffffff);
+ uint64_t x142, uint8_t _ = Op (Syntax.AddWithGetCarry 30 (Syntax.TWord 3) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 6) (Syntax.TWord 3)) (Return x139, Return x77, Return x140);
+ (Return x142, Return x138, Return x134, Return x130, Return x126, Return x122, Return x118, Return x114, Return x110, Return x106, Return x102, Return x98, Return x94, Return x90, Return x86, Return x82))
+x
+ : word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 * word64 → ReturnType (uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t * uint64_t)
diff --git a/src/Specific/solinas64_2e489m21/femul.c b/src/Specific/solinas64_2e489m21/femul.c
new file mode 100644
index 000000000..411c9ef12
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint128_t x36 = (((uint128_t)x5 * x34) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + ((0x2 * ((uint128_t)x13 * x29)) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + ((0x2 * ((uint128_t)x19 * x23)) + ((uint128_t)x18 * x21)))))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x15 * ((uint128_t)x18 * x34)));
+{ uint128_t x38 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + (((uint128_t)x11 * x27) + ((0x2 * ((uint128_t)x13 * x25)) + ((0x2 * ((uint128_t)x15 * x23)) + ((uint128_t)x17 * x21))))))) + (0x15 * ((0x2 * ((uint128_t)x19 * x34)) + (0x2 * ((uint128_t)x18 * x35)))));
+{ uint128_t x39 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + ((0x2 * ((uint128_t)x13 * x23)) + ((uint128_t)x15 * x21)))))) + (0x15 * (((uint128_t)x17 * x34) + ((0x2 * ((uint128_t)x19 * x35)) + ((uint128_t)x18 * x33)))));
+{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x15 * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((0x2 * ((uint128_t)x9 * x23)) + ((uint128_t)x11 * x21)))) + (0x15 * ((0x2 * ((uint128_t)x13 * x34)) + ((0x2 * ((uint128_t)x15 * x35)) + (((uint128_t)x17 * x33) + ((0x2 * ((uint128_t)x19 * x31)) + (0x2 * ((uint128_t)x18 * x29))))))));
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + ((0x2 * ((uint128_t)x7 * x23)) + ((uint128_t)x9 * x21))) + (0x15 * (((uint128_t)x11 * x34) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + ((0x2 * ((uint128_t)x19 * x29)) + ((uint128_t)x18 * x27))))))));
+{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x15 * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x15 * ((0x2 * ((uint128_t)x7 * x34)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + (((uint128_t)x17 * x27) + ((0x2 * ((uint128_t)x19 * x25)) + (0x2 * ((uint128_t)x18 * x23)))))))))));
+{ uint128_t x45 = (x44 >> 0x37);
+{ uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
+{ uint128_t x47 = (x45 + x43);
+{ uint128_t x48 = (x47 >> 0x36);
+{ uint64_t x49 = ((uint64_t)x47 & 0x3fffffffffffff);
+{ uint128_t x50 = (x48 + x42);
+{ uint128_t x51 = (x50 >> 0x36);
+{ uint64_t x52 = ((uint64_t)x50 & 0x3fffffffffffff);
+{ uint128_t x53 = (x51 + x41);
+{ uint128_t x54 = (x53 >> 0x37);
+{ uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
+{ uint128_t x56 = (x54 + x40);
+{ uint128_t x57 = (x56 >> 0x36);
+{ uint64_t x58 = ((uint64_t)x56 & 0x3fffffffffffff);
+{ uint128_t x59 = (x57 + x39);
+{ uint128_t x60 = (x59 >> 0x36);
+{ uint64_t x61 = ((uint64_t)x59 & 0x3fffffffffffff);
+{ uint128_t x62 = (x60 + x38);
+{ uint64_t x63 = (uint64_t) (x62 >> 0x37);
+{ uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
+{ uint128_t x65 = (x63 + x37);
+{ uint64_t x66 = (uint64_t) (x65 >> 0x36);
+{ uint64_t x67 = ((uint64_t)x65 & 0x3fffffffffffff);
+{ uint128_t x68 = (x66 + x36);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x36);
+{ uint64_t x70 = ((uint64_t)x68 & 0x3fffffffffffff);
+{ uint128_t x71 = (x46 + ((uint128_t)0x15 * x69));
+{ uint64_t x72 = (uint64_t) (x71 >> 0x37);
+{ uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
+{ uint64_t x74 = (x72 + x49);
+{ uint64_t x75 = (x74 >> 0x36);
+{ uint64_t x76 = (x74 & 0x3fffffffffffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e489m21/femul.h b/src/Specific/solinas64_2e489m21/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas64_2e489m21/fesquare.c b/src/Specific/solinas64_2e489m21/fesquare.c
new file mode 100644
index 000000000..841b6c705
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x17 = (((uint128_t)x2 * x15) + ((0x2 * ((uint128_t)x4 * x16)) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + ((0x2 * ((uint128_t)x10 * x10)) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x15 * x2)))))))));
+{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x15 * ((uint128_t)x15 * x15)));
+{ uint128_t x19 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + (((uint128_t)x8 * x8) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x15 * ((0x2 * ((uint128_t)x16 * x15)) + (0x2 * ((uint128_t)x15 * x16)))));
+{ uint128_t x20 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x15 * (((uint128_t)x14 * x15) + ((0x2 * ((uint128_t)x16 * x16)) + ((uint128_t)x15 * x14)))));
+{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x15 * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+{ uint128_t x22 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x15 * ((0x2 * ((uint128_t)x10 * x15)) + ((0x2 * ((uint128_t)x12 * x16)) + (((uint128_t)x14 * x14) + ((0x2 * ((uint128_t)x16 * x12)) + (0x2 * ((uint128_t)x15 * x10))))))));
+{ uint128_t x23 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x15 * (((uint128_t)x8 * x15) + ((0x2 * ((uint128_t)x10 * x16)) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + ((0x2 * ((uint128_t)x16 * x10)) + ((uint128_t)x15 * x8))))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x15 * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x15 * ((0x2 * ((uint128_t)x4 * x15)) + ((0x2 * ((uint128_t)x6 * x16)) + (((uint128_t)x8 * x14) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + (((uint128_t)x14 * x8) + ((0x2 * ((uint128_t)x16 * x6)) + (0x2 * ((uint128_t)x15 * x4)))))))))));
+{ uint128_t x26 = (x25 >> 0x37);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+{ uint128_t x28 = (x26 + x24);
+{ uint128_t x29 = (x28 >> 0x36);
+{ uint64_t x30 = ((uint64_t)x28 & 0x3fffffffffffff);
+{ uint128_t x31 = (x29 + x23);
+{ uint128_t x32 = (x31 >> 0x36);
+{ uint64_t x33 = ((uint64_t)x31 & 0x3fffffffffffff);
+{ uint128_t x34 = (x32 + x22);
+{ uint128_t x35 = (x34 >> 0x37);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+{ uint128_t x37 = (x35 + x21);
+{ uint128_t x38 = (x37 >> 0x36);
+{ uint64_t x39 = ((uint64_t)x37 & 0x3fffffffffffff);
+{ uint128_t x40 = (x38 + x20);
+{ uint128_t x41 = (x40 >> 0x36);
+{ uint64_t x42 = ((uint64_t)x40 & 0x3fffffffffffff);
+{ uint128_t x43 = (x41 + x19);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x37);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+{ uint128_t x46 = (x44 + x18);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x36);
+{ uint64_t x48 = ((uint64_t)x46 & 0x3fffffffffffff);
+{ uint128_t x49 = (x47 + x17);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x36);
+{ uint64_t x51 = ((uint64_t)x49 & 0x3fffffffffffff);
+{ uint128_t x52 = (x27 + ((uint128_t)0x15 * x50));
+{ uint64_t x53 = (uint64_t) (x52 >> 0x37);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
+{ uint64_t x55 = (x53 + x30);
+{ uint64_t x56 = (x55 >> 0x36);
+{ uint64_t x57 = (x55 & 0x3fffffffffffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e489m21/fesquare.h b/src/Specific/solinas64_2e489m21/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e489m21/freeze.c b/src/Specific/solinas64_2e489m21/freeze.c
new file mode 100644
index 000000000..61dceee8a
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffffffeb;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e489m21/freeze.h b/src/Specific/solinas64_2e489m21/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas64_2e489m21/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e495m31/femul.c b/src/Specific/solinas64_2e495m31/femul.c
new file mode 100644
index 000000000..b950c4c96
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/femul.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21)
+{ uint128_t x36 = (((uint128_t)x5 * x34) + (((uint128_t)x7 * x35) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + (((uint128_t)x17 * x25) + (((uint128_t)x19 * x23) + ((uint128_t)x18 * x21)))))))));
+{ uint128_t x37 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + (((uint128_t)x17 * x23) + ((uint128_t)x19 * x21)))))))) + (0x1f * ((uint128_t)x18 * x34)));
+{ uint128_t x38 = ((((uint128_t)x5 * x33) + (((uint128_t)x7 * x31) + (((uint128_t)x9 * x29) + (((uint128_t)x11 * x27) + (((uint128_t)x13 * x25) + (((uint128_t)x15 * x23) + ((uint128_t)x17 * x21))))))) + (0x1f * (((uint128_t)x19 * x34) + ((uint128_t)x18 * x35))));
+{ uint128_t x39 = ((((uint128_t)x5 * x31) + (((uint128_t)x7 * x29) + (((uint128_t)x9 * x27) + (((uint128_t)x11 * x25) + (((uint128_t)x13 * x23) + ((uint128_t)x15 * x21)))))) + (0x1f * (((uint128_t)x17 * x34) + (((uint128_t)x19 * x35) + ((uint128_t)x18 * x33)))));
+{ uint128_t x40 = ((((uint128_t)x5 * x29) + (((uint128_t)x7 * x27) + (((uint128_t)x9 * x25) + (((uint128_t)x11 * x23) + ((uint128_t)x13 * x21))))) + (0x1f * (((uint128_t)x15 * x34) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + ((uint128_t)x18 * x31))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x27) + (((uint128_t)x7 * x25) + (((uint128_t)x9 * x23) + ((uint128_t)x11 * x21)))) + (0x1f * (((uint128_t)x13 * x34) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + ((uint128_t)x18 * x29)))))));
+{ uint128_t x42 = ((((uint128_t)x5 * x25) + (((uint128_t)x7 * x23) + ((uint128_t)x9 * x21))) + (0x1f * (((uint128_t)x11 * x34) + (((uint128_t)x13 * x35) + (((uint128_t)x15 * x33) + (((uint128_t)x17 * x31) + (((uint128_t)x19 * x29) + ((uint128_t)x18 * x27))))))));
+{ uint128_t x43 = ((((uint128_t)x5 * x23) + ((uint128_t)x7 * x21)) + (0x1f * (((uint128_t)x9 * x34) + (((uint128_t)x11 * x35) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + (((uint128_t)x17 * x29) + (((uint128_t)x19 * x27) + ((uint128_t)x18 * x25)))))))));
+{ uint128_t x44 = (((uint128_t)x5 * x21) + (0x1f * (((uint128_t)x7 * x34) + (((uint128_t)x9 * x35) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + (((uint128_t)x17 * x27) + (((uint128_t)x19 * x25) + ((uint128_t)x18 * x23))))))))));
+{ uint128_t x45 = (x44 >> 0x37);
+{ uint64_t x46 = ((uint64_t)x44 & 0x7fffffffffffff);
+{ uint128_t x47 = (x45 + x43);
+{ uint128_t x48 = (x47 >> 0x37);
+{ uint64_t x49 = ((uint64_t)x47 & 0x7fffffffffffff);
+{ uint128_t x50 = (x48 + x42);
+{ uint128_t x51 = (x50 >> 0x37);
+{ uint64_t x52 = ((uint64_t)x50 & 0x7fffffffffffff);
+{ uint128_t x53 = (x51 + x41);
+{ uint128_t x54 = (x53 >> 0x37);
+{ uint64_t x55 = ((uint64_t)x53 & 0x7fffffffffffff);
+{ uint128_t x56 = (x54 + x40);
+{ uint128_t x57 = (x56 >> 0x37);
+{ uint64_t x58 = ((uint64_t)x56 & 0x7fffffffffffff);
+{ uint128_t x59 = (x57 + x39);
+{ uint128_t x60 = (x59 >> 0x37);
+{ uint64_t x61 = ((uint64_t)x59 & 0x7fffffffffffff);
+{ uint128_t x62 = (x60 + x38);
+{ uint128_t x63 = (x62 >> 0x37);
+{ uint64_t x64 = ((uint64_t)x62 & 0x7fffffffffffff);
+{ uint128_t x65 = (x63 + x37);
+{ uint64_t x66 = (uint64_t) (x65 >> 0x37);
+{ uint64_t x67 = ((uint64_t)x65 & 0x7fffffffffffff);
+{ uint128_t x68 = (x66 + x36);
+{ uint64_t x69 = (uint64_t) (x68 >> 0x37);
+{ uint64_t x70 = ((uint64_t)x68 & 0x7fffffffffffff);
+{ uint128_t x71 = (x46 + ((uint128_t)0x1f * x69));
+{ uint64_t x72 = (uint64_t) (x71 >> 0x37);
+{ uint64_t x73 = ((uint64_t)x71 & 0x7fffffffffffff);
+{ uint64_t x74 = (x72 + x49);
+{ uint64_t x75 = (x74 >> 0x37);
+{ uint64_t x76 = (x74 & 0x7fffffffffffff);
+out[0] = x70;
+out[1] = x67;
+out[2] = x64;
+out[3] = x61;
+out[4] = x58;
+out[5] = x55;
+out[6] = x75 + x52;
+out[7] = x76;
+out[8] = x73;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e495m31/femul.h b/src/Specific/solinas64_2e495m31/femul.h
new file mode 100644
index 000000000..031d77ff9
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x18, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x34, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23, uint64_t x21);
diff --git a/src/Specific/solinas64_2e495m31/fesquare.c b/src/Specific/solinas64_2e495m31/fesquare.c
new file mode 100644
index 000000000..dcf85abaa
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/fesquare.c
@@ -0,0 +1,71 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x17 = (((uint128_t)x2 * x15) + (((uint128_t)x4 * x16) + (((uint128_t)x6 * x14) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + (((uint128_t)x14 * x6) + (((uint128_t)x16 * x4) + ((uint128_t)x15 * x2)))))))));
+{ uint128_t x18 = ((((uint128_t)x2 * x16) + (((uint128_t)x4 * x14) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + (((uint128_t)x14 * x4) + ((uint128_t)x16 * x2)))))))) + (0x1f * ((uint128_t)x15 * x15)));
+{ uint128_t x19 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x1f * (((uint128_t)x16 * x15) + ((uint128_t)x15 * x16))));
+{ uint128_t x20 = ((((uint128_t)x2 * x12) + (((uint128_t)x4 * x10) + (((uint128_t)x6 * x8) + (((uint128_t)x8 * x6) + (((uint128_t)x10 * x4) + ((uint128_t)x12 * x2)))))) + (0x1f * (((uint128_t)x14 * x15) + (((uint128_t)x16 * x16) + ((uint128_t)x15 * x14)))));
+{ uint128_t x21 = ((((uint128_t)x2 * x10) + (((uint128_t)x4 * x8) + (((uint128_t)x6 * x6) + (((uint128_t)x8 * x4) + ((uint128_t)x10 * x2))))) + (0x1f * (((uint128_t)x12 * x15) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + ((uint128_t)x15 * x12))))));
+{ uint128_t x22 = ((((uint128_t)x2 * x8) + (((uint128_t)x4 * x6) + (((uint128_t)x6 * x4) + ((uint128_t)x8 * x2)))) + (0x1f * (((uint128_t)x10 * x15) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + ((uint128_t)x15 * x10)))))));
+{ uint128_t x23 = ((((uint128_t)x2 * x6) + (((uint128_t)x4 * x4) + ((uint128_t)x6 * x2))) + (0x1f * (((uint128_t)x8 * x15) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + ((uint128_t)x15 * x8))))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1f * (((uint128_t)x6 * x15) + (((uint128_t)x8 * x16) + (((uint128_t)x10 * x14) + (((uint128_t)x12 * x12) + (((uint128_t)x14 * x10) + (((uint128_t)x16 * x8) + ((uint128_t)x15 * x6)))))))));
+{ uint128_t x25 = (((uint128_t)x2 * x2) + (0x1f * (((uint128_t)x4 * x15) + (((uint128_t)x6 * x16) + (((uint128_t)x8 * x14) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + (((uint128_t)x14 * x8) + (((uint128_t)x16 * x6) + ((uint128_t)x15 * x4))))))))));
+{ uint128_t x26 = (x25 >> 0x37);
+{ uint64_t x27 = ((uint64_t)x25 & 0x7fffffffffffff);
+{ uint128_t x28 = (x26 + x24);
+{ uint128_t x29 = (x28 >> 0x37);
+{ uint64_t x30 = ((uint64_t)x28 & 0x7fffffffffffff);
+{ uint128_t x31 = (x29 + x23);
+{ uint128_t x32 = (x31 >> 0x37);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7fffffffffffff);
+{ uint128_t x34 = (x32 + x22);
+{ uint128_t x35 = (x34 >> 0x37);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7fffffffffffff);
+{ uint128_t x37 = (x35 + x21);
+{ uint128_t x38 = (x37 >> 0x37);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7fffffffffffff);
+{ uint128_t x40 = (x38 + x20);
+{ uint128_t x41 = (x40 >> 0x37);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7fffffffffffff);
+{ uint128_t x43 = (x41 + x19);
+{ uint128_t x44 = (x43 >> 0x37);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7fffffffffffff);
+{ uint128_t x46 = (x44 + x18);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x37);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7fffffffffffff);
+{ uint128_t x49 = (x47 + x17);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x37);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7fffffffffffff);
+{ uint128_t x52 = (x27 + ((uint128_t)0x1f * x50));
+{ uint64_t x53 = (uint64_t) (x52 >> 0x37);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7fffffffffffff);
+{ uint64_t x55 = (x53 + x30);
+{ uint64_t x56 = (x55 >> 0x37);
+{ uint64_t x57 = (x55 & 0x7fffffffffffff);
+out[0] = x51;
+out[1] = x48;
+out[2] = x45;
+out[3] = x42;
+out[4] = x39;
+out[5] = x36;
+out[6] = x56 + x33;
+out[7] = x57;
+out[8] = x54;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[9];
diff --git a/src/Specific/solinas64_2e495m31/fesquare.h b/src/Specific/solinas64_2e495m31/fesquare.h
new file mode 100644
index 000000000..ea76fd13b
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e495m31/freeze.c b/src/Specific/solinas64_2e495m31/freeze.c
new file mode 100644
index 000000000..2825b688d
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x18;
+out[1] = uint8_t x19 = Op Syntax.SubWithGetBorrow 55 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x7fffffffffffe1;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e495m31/freeze.h b/src/Specific/solinas64_2e495m31/freeze.h
new file mode 100644
index 000000000..9e0ff6410
--- /dev/null
+++ b/src/Specific/solinas64_2e495m31/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x15, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e510m290x2e496m1/freeze.c b/src/Specific/solinas64_2e510m290x2e496m1/freeze.c
new file mode 100644
index 000000000..d36b70a17
--- /dev/null
+++ b/src/Specific/solinas64_2e510m290x2e496m1/freeze.c
@@ -0,0 +1,62 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint64_t x20; uint8_t x21 = _subborrow_u51(0x0, x2, 0x7ffffffffffff, &x20);
+{ uint64_t x23; uint8_t x24 = _subborrow_u51(x21, x4, 0x7ffffffffffff, &x23);
+{ uint64_t x26; uint8_t x27 = _subborrow_u51(x24, x6, 0x7ffffffffffff, &x26);
+{ uint64_t x29; uint8_t x30 = _subborrow_u51(x27, x8, 0x7ffffffffffff, &x29);
+{ uint64_t x32; uint8_t x33 = _subborrow_u51(x30, x10, 0x7ffffffffffff, &x32);
+{ uint64_t x35; uint8_t x36 = _subborrow_u51(x33, x12, 0x7ffffffffffff, &x35);
+{ uint64_t x38; uint8_t x39 = _subborrow_u51(x36, x14, 0x7ffffffffffff, &x38);
+{ uint64_t x41; uint8_t x42 = _subborrow_u51(x39, x16, 0x7ffffffffffff, &x41);
+{ uint64_t x44; uint8_t x45 = _subborrow_u51(x42, x18, 0x7ffffffffffff, &x44);
+{ uint64_t x47; uint8_t x48 = _subborrow_u51(x45, x17, 0x7dbbfffffffff, &x47);
+{ uint64_t x49 = (uint64_t)cmovznz(x48, 0x0, 0xffffffffffffffffL);
+{ uint64_t x50 = (x49 & 0x7ffffffffffff);
+{ uint64_t x52; uint8_t x53 = _addcarryx_u51(0x0, x20, x50, &x52);
+{ uint64_t x54 = (x49 & 0x7ffffffffffff);
+{ uint64_t x56; uint8_t x57 = _addcarryx_u51(x53, x23, x54, &x56);
+{ uint64_t x58 = (x49 & 0x7ffffffffffff);
+{ uint64_t x60; uint8_t x61 = _addcarryx_u51(x57, x26, x58, &x60);
+{ uint64_t x62 = (x49 & 0x7ffffffffffff);
+{ uint64_t x64; uint8_t x65 = _addcarryx_u51(x61, x29, x62, &x64);
+{ uint64_t x66 = (x49 & 0x7ffffffffffff);
+{ uint64_t x68; uint8_t x69 = _addcarryx_u51(x65, x32, x66, &x68);
+{ uint64_t x70 = (x49 & 0x7ffffffffffff);
+{ uint64_t x72; uint8_t x73 = _addcarryx_u51(x69, x35, x70, &x72);
+{ uint64_t x74 = (x49 & 0x7ffffffffffff);
+{ uint64_t x76; uint8_t x77 = _addcarryx_u51(x73, x38, x74, &x76);
+{ uint64_t x78 = (x49 & 0x7ffffffffffff);
+{ uint64_t x80; uint8_t x81 = _addcarryx_u51(x77, x41, x78, &x80);
+{ uint64_t x82 = (x49 & 0x7ffffffffffff);
+{ uint64_t x84; uint8_t x85 = _addcarryx_u51(x81, x44, x82, &x84);
+{ uint64_t x86 = (x49 & 0x7dbbfffffffff);
+{ uint64_t x88; uint8_t _ = _addcarryx_u51(x85, x47, x86, &x88);
+out[0] = x88;
+out[1] = x84;
+out[2] = x80;
+out[3] = x76;
+out[4] = x72;
+out[5] = x68;
+out[6] = x64;
+out[7] = x60;
+out[8] = x56;
+out[9] = x52;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e510m290x2e496m1/freeze.h b/src/Specific/solinas64_2e510m290x2e496m1/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e510m290x2e496m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e511m187/femul.c b/src/Specific/solinas64_2e511m187/femul.c
new file mode 100644
index 000000000..3aad50af3
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0xbb * ((uint128_t)x20 * x38)));
+{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0xbb * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0xbb * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0xbb * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
+{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0xbb * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
+{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0xbb * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0xbb * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0xbb * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+{ uint128_t x49 = (((uint128_t)x5 * x23) + (0xbb * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+{ uint128_t x50 = (x49 >> 0x34);
+{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+{ uint128_t x52 = (x50 + x48);
+{ uint128_t x53 = (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x47);
+{ uint128_t x56 = (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x56 + x46);
+{ uint128_t x59 = (x58 >> 0x33);
+{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+{ uint128_t x61 = (x59 + x45);
+{ uint128_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+{ uint128_t x64 = (x62 + x44);
+{ uint128_t x65 = (x64 >> 0x33);
+{ uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
+{ uint128_t x67 = (x65 + x43);
+{ uint64_t x68 = (uint64_t) (x67 >> 0x33);
+{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+{ uint128_t x70 = (x68 + x42);
+{ uint64_t x71 = (uint64_t) (x70 >> 0x33);
+{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+{ uint128_t x73 = (x71 + x41);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
+{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+{ uint128_t x76 = (x74 + x40);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
+{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+{ uint128_t x79 = (x51 + ((uint128_t)0xbb * x77));
+{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
+{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x33);
+{ uint64_t x84 = (x82 & 0x7ffffffffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e511m187/femul.h b/src/Specific/solinas64_2e511m187/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas64_2e511m187/fesquare.c b/src/Specific/solinas64_2e511m187/fesquare.c
new file mode 100644
index 000000000..f76c2ef67
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0xbb * ((uint128_t)x17 * x17)));
+{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0xbb * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0xbb * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0xbb * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0xbb * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0xbb * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0xbb * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0xbb * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+{ uint128_t x28 = (((uint128_t)x2 * x2) + (0xbb * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+{ uint128_t x29 = (x28 >> 0x34);
+{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+{ uint128_t x31 = (x29 + x27);
+{ uint128_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+{ uint128_t x34 = (x32 + x26);
+{ uint128_t x35 = (x34 >> 0x33);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+{ uint128_t x37 = (x35 + x25);
+{ uint128_t x38 = (x37 >> 0x33);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+{ uint128_t x40 = (x38 + x24);
+{ uint128_t x41 = (x40 >> 0x33);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+{ uint128_t x43 = (x41 + x23);
+{ uint128_t x44 = (x43 >> 0x33);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
+{ uint128_t x46 = (x44 + x22);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x33);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+{ uint128_t x49 = (x47 + x21);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x33);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+{ uint128_t x52 = (x50 + x20);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x19);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x30 + ((uint128_t)0xbb * x56));
+{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
+{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = (x61 & 0x7ffffffffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e511m187/fesquare.h b/src/Specific/solinas64_2e511m187/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e511m187/freeze.c b/src/Specific/solinas64_2e511m187/freeze.c
new file mode 100644
index 000000000..20e1a8cee
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffff45;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e511m187/freeze.h b/src/Specific/solinas64_2e511m187/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e511m187/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e511m481/femul.c b/src/Specific/solinas64_2e511m481/femul.c
new file mode 100644
index 000000000..7cc6675b3
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x1e1 * ((uint128_t)x20 * x38)));
+{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x1e1 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (0x1e1 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x1e1 * (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35))))));
+{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x1e1 * (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33)))))));
+{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x1e1 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x1e1 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x1e1 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x1e1 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+{ uint128_t x50 = (x49 >> 0x34);
+{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+{ uint128_t x52 = (x50 + x48);
+{ uint128_t x53 = (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x47);
+{ uint128_t x56 = (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x56 + x46);
+{ uint128_t x59 = (x58 >> 0x33);
+{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+{ uint128_t x61 = (x59 + x45);
+{ uint128_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+{ uint128_t x64 = (x62 + x44);
+{ uint128_t x65 = (x64 >> 0x33);
+{ uint64_t x66 = ((uint64_t)x64 & 0x7ffffffffffff);
+{ uint128_t x67 = (x65 + x43);
+{ uint128_t x68 = (x67 >> 0x33);
+{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+{ uint128_t x70 = (x68 + x42);
+{ uint128_t x71 = (x70 >> 0x33);
+{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+{ uint128_t x73 = (x71 + x41);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
+{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+{ uint128_t x76 = (x74 + x40);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
+{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+{ uint128_t x79 = (x51 + ((uint128_t)0x1e1 * x77));
+{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
+{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x33);
+{ uint64_t x84 = (x82 & 0x7ffffffffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e511m481/femul.h b/src/Specific/solinas64_2e511m481/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas64_2e511m481/fesquare.c b/src/Specific/solinas64_2e511m481/fesquare.c
new file mode 100644
index 000000000..119be6910
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x1e1 * ((uint128_t)x17 * x17)));
+{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x1e1 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (0x1e1 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x1e1 * (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x1e1 * (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12)))))));
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x1e1 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x1e1 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x1e1 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x1e1 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+{ uint128_t x29 = (x28 >> 0x34);
+{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+{ uint128_t x31 = (x29 + x27);
+{ uint128_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+{ uint128_t x34 = (x32 + x26);
+{ uint128_t x35 = (x34 >> 0x33);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+{ uint128_t x37 = (x35 + x25);
+{ uint128_t x38 = (x37 >> 0x33);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+{ uint128_t x40 = (x38 + x24);
+{ uint128_t x41 = (x40 >> 0x33);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+{ uint128_t x43 = (x41 + x23);
+{ uint128_t x44 = (x43 >> 0x33);
+{ uint64_t x45 = ((uint64_t)x43 & 0x7ffffffffffff);
+{ uint128_t x46 = (x44 + x22);
+{ uint128_t x47 = (x46 >> 0x33);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+{ uint128_t x49 = (x47 + x21);
+{ uint128_t x50 = (x49 >> 0x33);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+{ uint128_t x52 = (x50 + x20);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x19);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x30 + ((uint128_t)0x1e1 * x56));
+{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
+{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = (x61 & 0x7ffffffffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e511m481/fesquare.h b/src/Specific/solinas64_2e511m481/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e511m481/freeze.c b/src/Specific/solinas64_2e511m481/freeze.c
new file mode 100644
index 000000000..0972f7c11
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffe1f;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e511m481/freeze.h b/src/Specific/solinas64_2e511m481/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e511m481/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e512m491x2e496m1/freeze.c b/src/Specific/solinas64_2e512m491x2e496m1/freeze.c
new file mode 100644
index 000000000..9f6318ae5
--- /dev/null
+++ b/src/Specific/solinas64_2e512m491x2e496m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xfffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e512m491x2e496m1/freeze.h b/src/Specific/solinas64_2e512m491x2e496m1/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e512m491x2e496m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e512m569/femul.c b/src/Specific/solinas64_2e512m569/femul.c
new file mode 100644
index 000000000..e4b8904f7
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + (((uint128_t)x13 * x33) + (((uint128_t)x15 * x31) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + (((uint128_t)x11 * x33) + (((uint128_t)x13 * x31) + (((uint128_t)x15 * x29) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + (0x239 * ((uint128_t)x20 * x38)));
+{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + (((uint128_t)x9 * x33) + (((uint128_t)x11 * x31) + (((uint128_t)x13 * x29) + (((uint128_t)x15 * x27) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (0x239 * (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39))));
+{ uint128_t x43 = ((((uint128_t)x5 * x35) + (((uint128_t)x7 * x33) + (((uint128_t)x9 * x31) + (((uint128_t)x11 * x29) + (((uint128_t)x13 * x27) + (((uint128_t)x15 * x25) + ((uint128_t)x17 * x23))))))) + (0x239 * (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37)))));
+{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (0x239 * ((0x2 * ((uint128_t)x17 * x38)) + ((0x2 * ((uint128_t)x19 * x39)) + ((0x2 * ((uint128_t)x21 * x37)) + (0x2 * ((uint128_t)x20 * x35)))))));
+{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (0x239 * (((uint128_t)x15 * x38) + ((0x2 * ((uint128_t)x17 * x39)) + ((0x2 * ((uint128_t)x19 * x37)) + ((0x2 * ((uint128_t)x21 * x35)) + ((uint128_t)x20 * x33)))))));
+{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (0x239 * (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + ((0x2 * ((uint128_t)x17 * x37)) + ((0x2 * ((uint128_t)x19 * x35)) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31))))))));
+{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (0x239 * (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + ((0x2 * ((uint128_t)x17 * x35)) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29)))))))));
+{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (0x239 * (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27))))))))));
+{ uint128_t x49 = (((uint128_t)x5 * x23) + (0x239 * ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + (((uint128_t)x15 * x33) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25))))))))))));
+{ uint128_t x50 = (x49 >> 0x34);
+{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+{ uint128_t x52 = (x50 + x48);
+{ uint128_t x53 = (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x47);
+{ uint128_t x56 = (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x56 + x46);
+{ uint128_t x59 = (x58 >> 0x33);
+{ uint64_t x60 = ((uint64_t)x58 & 0x7ffffffffffff);
+{ uint128_t x61 = (x59 + x45);
+{ uint128_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = ((uint64_t)x61 & 0x7ffffffffffff);
+{ uint128_t x64 = (x62 + x44);
+{ uint128_t x65 = (x64 >> 0x34);
+{ uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
+{ uint128_t x67 = (x65 + x43);
+{ uint128_t x68 = (x67 >> 0x33);
+{ uint64_t x69 = ((uint64_t)x67 & 0x7ffffffffffff);
+{ uint128_t x70 = (x68 + x42);
+{ uint128_t x71 = (x70 >> 0x33);
+{ uint64_t x72 = ((uint64_t)x70 & 0x7ffffffffffff);
+{ uint128_t x73 = (x71 + x41);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x33);
+{ uint64_t x75 = ((uint64_t)x73 & 0x7ffffffffffff);
+{ uint128_t x76 = (x74 + x40);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x33);
+{ uint64_t x78 = ((uint64_t)x76 & 0x7ffffffffffff);
+{ uint128_t x79 = (x51 + ((uint128_t)0x239 * x77));
+{ uint64_t x80 = (uint64_t) (x79 >> 0x34);
+{ uint64_t x81 = ((uint64_t)x79 & 0xfffffffffffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x33);
+{ uint64_t x84 = (x82 & 0x7ffffffffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e512m569/femul.h b/src/Specific/solinas64_2e512m569/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas64_2e512m569/fesquare.c b/src/Specific/solinas64_2e512m569/fesquare.c
new file mode 100644
index 000000000..61f6a5c8c
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + (((uint128_t)x10 * x12) + (((uint128_t)x12 * x10) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + (((uint128_t)x8 * x12) + (((uint128_t)x10 * x10) + (((uint128_t)x12 * x8) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + (0x239 * ((uint128_t)x17 * x17)));
+{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + (((uint128_t)x6 * x12) + (((uint128_t)x8 * x10) + (((uint128_t)x10 * x8) + (((uint128_t)x12 * x6) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (0x239 * (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18))));
+{ uint128_t x22 = ((((uint128_t)x2 * x14) + (((uint128_t)x4 * x12) + (((uint128_t)x6 * x10) + (((uint128_t)x8 * x8) + (((uint128_t)x10 * x6) + (((uint128_t)x12 * x4) + ((uint128_t)x14 * x2))))))) + (0x239 * (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16)))));
+{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (0x239 * ((0x2 * ((uint128_t)x14 * x17)) + ((0x2 * ((uint128_t)x16 * x18)) + ((0x2 * ((uint128_t)x18 * x16)) + (0x2 * ((uint128_t)x17 * x14)))))));
+{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (0x239 * (((uint128_t)x12 * x17) + ((0x2 * ((uint128_t)x14 * x18)) + ((0x2 * ((uint128_t)x16 * x16)) + ((0x2 * ((uint128_t)x18 * x14)) + ((uint128_t)x17 * x12)))))));
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (0x239 * (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + ((0x2 * ((uint128_t)x14 * x16)) + ((0x2 * ((uint128_t)x16 * x14)) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10))))))));
+{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (0x239 * (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + ((0x2 * ((uint128_t)x14 * x14)) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8)))))))));
+{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (0x239 * (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6))))))))));
+{ uint128_t x28 = (((uint128_t)x2 * x2) + (0x239 * ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + (((uint128_t)x12 * x12) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4))))))))))));
+{ uint128_t x29 = (x28 >> 0x34);
+{ uint64_t x30 = ((uint64_t)x28 & 0xfffffffffffff);
+{ uint128_t x31 = (x29 + x27);
+{ uint128_t x32 = (x31 >> 0x33);
+{ uint64_t x33 = ((uint64_t)x31 & 0x7ffffffffffff);
+{ uint128_t x34 = (x32 + x26);
+{ uint128_t x35 = (x34 >> 0x33);
+{ uint64_t x36 = ((uint64_t)x34 & 0x7ffffffffffff);
+{ uint128_t x37 = (x35 + x25);
+{ uint128_t x38 = (x37 >> 0x33);
+{ uint64_t x39 = ((uint64_t)x37 & 0x7ffffffffffff);
+{ uint128_t x40 = (x38 + x24);
+{ uint128_t x41 = (x40 >> 0x33);
+{ uint64_t x42 = ((uint64_t)x40 & 0x7ffffffffffff);
+{ uint128_t x43 = (x41 + x23);
+{ uint128_t x44 = (x43 >> 0x34);
+{ uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
+{ uint128_t x46 = (x44 + x22);
+{ uint128_t x47 = (x46 >> 0x33);
+{ uint64_t x48 = ((uint64_t)x46 & 0x7ffffffffffff);
+{ uint128_t x49 = (x47 + x21);
+{ uint128_t x50 = (x49 >> 0x33);
+{ uint64_t x51 = ((uint64_t)x49 & 0x7ffffffffffff);
+{ uint128_t x52 = (x50 + x20);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x33);
+{ uint64_t x54 = ((uint64_t)x52 & 0x7ffffffffffff);
+{ uint128_t x55 = (x53 + x19);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x33);
+{ uint64_t x57 = ((uint64_t)x55 & 0x7ffffffffffff);
+{ uint128_t x58 = (x30 + ((uint128_t)0x239 * x56));
+{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
+{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x33);
+{ uint64_t x63 = (x61 & 0x7ffffffffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e512m569/fesquare.h b/src/Specific/solinas64_2e512m569/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e512m569/freeze.c b/src/Specific/solinas64_2e512m569/freeze.c
new file mode 100644
index 000000000..ab6b2036f
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 52 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0xffffffffffdc7;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e512m569/freeze.h b/src/Specific/solinas64_2e512m569/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e512m569/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e521m1/femul.c b/src/Specific/solinas64_2e521m1/femul.c
new file mode 100644
index 000000000..085ed88ea
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/femul.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "femul.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23)
+{ uint128_t x40 = (((uint128_t)x5 * x38) + ((0x2 * ((uint128_t)x7 * x39)) + ((0x2 * ((uint128_t)x9 * x37)) + ((0x2 * ((uint128_t)x11 * x35)) + ((0x2 * ((uint128_t)x13 * x33)) + ((0x2 * ((uint128_t)x15 * x31)) + ((0x2 * ((uint128_t)x17 * x29)) + ((0x2 * ((uint128_t)x19 * x27)) + ((0x2 * ((uint128_t)x21 * x25)) + ((uint128_t)x20 * x23))))))))));
+{ uint128_t x41 = ((((uint128_t)x5 * x39) + ((0x2 * ((uint128_t)x7 * x37)) + ((0x2 * ((uint128_t)x9 * x35)) + ((0x2 * ((uint128_t)x11 * x33)) + ((0x2 * ((uint128_t)x13 * x31)) + ((0x2 * ((uint128_t)x15 * x29)) + ((0x2 * ((uint128_t)x17 * x27)) + ((0x2 * ((uint128_t)x19 * x25)) + ((uint128_t)x21 * x23))))))))) + ((uint128_t)x20 * x38));
+{ uint128_t x42 = ((((uint128_t)x5 * x37) + ((0x2 * ((uint128_t)x7 * x35)) + ((0x2 * ((uint128_t)x9 * x33)) + ((0x2 * ((uint128_t)x11 * x31)) + ((0x2 * ((uint128_t)x13 * x29)) + ((0x2 * ((uint128_t)x15 * x27)) + ((0x2 * ((uint128_t)x17 * x25)) + ((uint128_t)x19 * x23)))))))) + (((uint128_t)x21 * x38) + ((uint128_t)x20 * x39)));
+{ uint128_t x43 = ((((uint128_t)x5 * x35) + ((0x2 * ((uint128_t)x7 * x33)) + ((0x2 * ((uint128_t)x9 * x31)) + ((0x2 * ((uint128_t)x11 * x29)) + ((0x2 * ((uint128_t)x13 * x27)) + ((0x2 * ((uint128_t)x15 * x25)) + ((uint128_t)x17 * x23))))))) + (((uint128_t)x19 * x38) + (((uint128_t)x21 * x39) + ((uint128_t)x20 * x37))));
+{ uint128_t x44 = ((((uint128_t)x5 * x33) + ((0x2 * ((uint128_t)x7 * x31)) + ((0x2 * ((uint128_t)x9 * x29)) + ((0x2 * ((uint128_t)x11 * x27)) + ((0x2 * ((uint128_t)x13 * x25)) + ((uint128_t)x15 * x23)))))) + (((uint128_t)x17 * x38) + (((uint128_t)x19 * x39) + (((uint128_t)x21 * x37) + ((uint128_t)x20 * x35)))));
+{ uint128_t x45 = ((((uint128_t)x5 * x31) + ((0x2 * ((uint128_t)x7 * x29)) + ((0x2 * ((uint128_t)x9 * x27)) + ((0x2 * ((uint128_t)x11 * x25)) + ((uint128_t)x13 * x23))))) + (((uint128_t)x15 * x38) + (((uint128_t)x17 * x39) + (((uint128_t)x19 * x37) + (((uint128_t)x21 * x35) + ((uint128_t)x20 * x33))))));
+{ uint128_t x46 = ((((uint128_t)x5 * x29) + ((0x2 * ((uint128_t)x7 * x27)) + ((0x2 * ((uint128_t)x9 * x25)) + ((uint128_t)x11 * x23)))) + (((uint128_t)x13 * x38) + (((uint128_t)x15 * x39) + (((uint128_t)x17 * x37) + (((uint128_t)x19 * x35) + (((uint128_t)x21 * x33) + ((uint128_t)x20 * x31)))))));
+{ uint128_t x47 = ((((uint128_t)x5 * x27) + ((0x2 * ((uint128_t)x7 * x25)) + ((uint128_t)x9 * x23))) + (((uint128_t)x11 * x38) + (((uint128_t)x13 * x39) + (((uint128_t)x15 * x37) + (((uint128_t)x17 * x35) + (((uint128_t)x19 * x33) + (((uint128_t)x21 * x31) + ((uint128_t)x20 * x29))))))));
+{ uint128_t x48 = ((((uint128_t)x5 * x25) + ((uint128_t)x7 * x23)) + (((uint128_t)x9 * x38) + (((uint128_t)x11 * x39) + (((uint128_t)x13 * x37) + (((uint128_t)x15 * x35) + (((uint128_t)x17 * x33) + (((uint128_t)x19 * x31) + (((uint128_t)x21 * x29) + ((uint128_t)x20 * x27)))))))));
+{ uint128_t x49 = (((uint128_t)x5 * x23) + ((0x2 * ((uint128_t)x7 * x38)) + ((0x2 * ((uint128_t)x9 * x39)) + ((0x2 * ((uint128_t)x11 * x37)) + ((0x2 * ((uint128_t)x13 * x35)) + ((0x2 * ((uint128_t)x15 * x33)) + ((0x2 * ((uint128_t)x17 * x31)) + ((0x2 * ((uint128_t)x19 * x29)) + ((0x2 * ((uint128_t)x21 * x27)) + (0x2 * ((uint128_t)x20 * x25)))))))))));
+{ uint64_t x50 = (uint64_t) (x49 >> 0x35);
+{ uint64_t x51 = ((uint64_t)x49 & 0x1fffffffffffff);
+{ uint128_t x52 = (x50 + x48);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x34);
+{ uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
+{ uint128_t x55 = (x53 + x47);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x34);
+{ uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
+{ uint128_t x58 = (x56 + x46);
+{ uint64_t x59 = (uint64_t) (x58 >> 0x34);
+{ uint64_t x60 = ((uint64_t)x58 & 0xfffffffffffff);
+{ uint128_t x61 = (x59 + x45);
+{ uint64_t x62 = (uint64_t) (x61 >> 0x34);
+{ uint64_t x63 = ((uint64_t)x61 & 0xfffffffffffff);
+{ uint128_t x64 = (x62 + x44);
+{ uint64_t x65 = (uint64_t) (x64 >> 0x34);
+{ uint64_t x66 = ((uint64_t)x64 & 0xfffffffffffff);
+{ uint128_t x67 = (x65 + x43);
+{ uint64_t x68 = (uint64_t) (x67 >> 0x34);
+{ uint64_t x69 = ((uint64_t)x67 & 0xfffffffffffff);
+{ uint128_t x70 = (x68 + x42);
+{ uint64_t x71 = (uint64_t) (x70 >> 0x34);
+{ uint64_t x72 = ((uint64_t)x70 & 0xfffffffffffff);
+{ uint128_t x73 = (x71 + x41);
+{ uint64_t x74 = (uint64_t) (x73 >> 0x34);
+{ uint64_t x75 = ((uint64_t)x73 & 0xfffffffffffff);
+{ uint128_t x76 = (x74 + x40);
+{ uint64_t x77 = (uint64_t) (x76 >> 0x34);
+{ uint64_t x78 = ((uint64_t)x76 & 0xfffffffffffff);
+{ uint64_t x79 = (x51 + x77);
+{ uint64_t x80 = (x79 >> 0x35);
+{ uint64_t x81 = (x79 & 0x1fffffffffffff);
+{ uint64_t x82 = (x80 + x54);
+{ uint64_t x83 = (x82 >> 0x34);
+{ uint64_t x84 = (x82 & 0xfffffffffffff);
+out[0] = x78;
+out[1] = x75;
+out[2] = x72;
+out[3] = x69;
+out[4] = x66;
+out[5] = x63;
+out[6] = x60;
+out[7] = x83 + x57;
+out[8] = x84;
+out[9] = x81;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e521m1/femul.h b/src/Specific/solinas64_2e521m1/femul.h
new file mode 100644
index 000000000..41b9c659a
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/femul.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline femul(uint64_t* out, uint64_t x20, uint64_t x21, uint64_t x19, uint64_t x17, uint64_t x15, uint64_t x13, uint64_t x11, uint64_t x9, uint64_t x7, uint64_t x5, uint64_t x38, uint64_t x39, uint64_t x37, uint64_t x35, uint64_t x33, uint64_t x31, uint64_t x29, uint64_t x27, uint64_t x25, uint64_t x23);
diff --git a/src/Specific/solinas64_2e521m1/fesquare.c b/src/Specific/solinas64_2e521m1/fesquare.c
new file mode 100644
index 000000000..280bf114e
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/fesquare.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "fesquare.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+{ uint128_t x19 = (((uint128_t)x2 * x17) + ((0x2 * ((uint128_t)x4 * x18)) + ((0x2 * ((uint128_t)x6 * x16)) + ((0x2 * ((uint128_t)x8 * x14)) + ((0x2 * ((uint128_t)x10 * x12)) + ((0x2 * ((uint128_t)x12 * x10)) + ((0x2 * ((uint128_t)x14 * x8)) + ((0x2 * ((uint128_t)x16 * x6)) + ((0x2 * ((uint128_t)x18 * x4)) + ((uint128_t)x17 * x2))))))))));
+{ uint128_t x20 = ((((uint128_t)x2 * x18) + ((0x2 * ((uint128_t)x4 * x16)) + ((0x2 * ((uint128_t)x6 * x14)) + ((0x2 * ((uint128_t)x8 * x12)) + ((0x2 * ((uint128_t)x10 * x10)) + ((0x2 * ((uint128_t)x12 * x8)) + ((0x2 * ((uint128_t)x14 * x6)) + ((0x2 * ((uint128_t)x16 * x4)) + ((uint128_t)x18 * x2))))))))) + ((uint128_t)x17 * x17));
+{ uint128_t x21 = ((((uint128_t)x2 * x16) + ((0x2 * ((uint128_t)x4 * x14)) + ((0x2 * ((uint128_t)x6 * x12)) + ((0x2 * ((uint128_t)x8 * x10)) + ((0x2 * ((uint128_t)x10 * x8)) + ((0x2 * ((uint128_t)x12 * x6)) + ((0x2 * ((uint128_t)x14 * x4)) + ((uint128_t)x16 * x2)))))))) + (((uint128_t)x18 * x17) + ((uint128_t)x17 * x18)));
+{ uint128_t x22 = ((((uint128_t)x2 * x14) + ((0x2 * ((uint128_t)x4 * x12)) + ((0x2 * ((uint128_t)x6 * x10)) + ((0x2 * ((uint128_t)x8 * x8)) + ((0x2 * ((uint128_t)x10 * x6)) + ((0x2 * ((uint128_t)x12 * x4)) + ((uint128_t)x14 * x2))))))) + (((uint128_t)x16 * x17) + (((uint128_t)x18 * x18) + ((uint128_t)x17 * x16))));
+{ uint128_t x23 = ((((uint128_t)x2 * x12) + ((0x2 * ((uint128_t)x4 * x10)) + ((0x2 * ((uint128_t)x6 * x8)) + ((0x2 * ((uint128_t)x8 * x6)) + ((0x2 * ((uint128_t)x10 * x4)) + ((uint128_t)x12 * x2)))))) + (((uint128_t)x14 * x17) + (((uint128_t)x16 * x18) + (((uint128_t)x18 * x16) + ((uint128_t)x17 * x14)))));
+{ uint128_t x24 = ((((uint128_t)x2 * x10) + ((0x2 * ((uint128_t)x4 * x8)) + ((0x2 * ((uint128_t)x6 * x6)) + ((0x2 * ((uint128_t)x8 * x4)) + ((uint128_t)x10 * x2))))) + (((uint128_t)x12 * x17) + (((uint128_t)x14 * x18) + (((uint128_t)x16 * x16) + (((uint128_t)x18 * x14) + ((uint128_t)x17 * x12))))));
+{ uint128_t x25 = ((((uint128_t)x2 * x8) + ((0x2 * ((uint128_t)x4 * x6)) + ((0x2 * ((uint128_t)x6 * x4)) + ((uint128_t)x8 * x2)))) + (((uint128_t)x10 * x17) + (((uint128_t)x12 * x18) + (((uint128_t)x14 * x16) + (((uint128_t)x16 * x14) + (((uint128_t)x18 * x12) + ((uint128_t)x17 * x10)))))));
+{ uint128_t x26 = ((((uint128_t)x2 * x6) + ((0x2 * ((uint128_t)x4 * x4)) + ((uint128_t)x6 * x2))) + (((uint128_t)x8 * x17) + (((uint128_t)x10 * x18) + (((uint128_t)x12 * x16) + (((uint128_t)x14 * x14) + (((uint128_t)x16 * x12) + (((uint128_t)x18 * x10) + ((uint128_t)x17 * x8))))))));
+{ uint128_t x27 = ((((uint128_t)x2 * x4) + ((uint128_t)x4 * x2)) + (((uint128_t)x6 * x17) + (((uint128_t)x8 * x18) + (((uint128_t)x10 * x16) + (((uint128_t)x12 * x14) + (((uint128_t)x14 * x12) + (((uint128_t)x16 * x10) + (((uint128_t)x18 * x8) + ((uint128_t)x17 * x6)))))))));
+{ uint128_t x28 = (((uint128_t)x2 * x2) + ((0x2 * ((uint128_t)x4 * x17)) + ((0x2 * ((uint128_t)x6 * x18)) + ((0x2 * ((uint128_t)x8 * x16)) + ((0x2 * ((uint128_t)x10 * x14)) + ((0x2 * ((uint128_t)x12 * x12)) + ((0x2 * ((uint128_t)x14 * x10)) + ((0x2 * ((uint128_t)x16 * x8)) + ((0x2 * ((uint128_t)x18 * x6)) + (0x2 * ((uint128_t)x17 * x4)))))))))));
+{ uint64_t x29 = (uint64_t) (x28 >> 0x35);
+{ uint64_t x30 = ((uint64_t)x28 & 0x1fffffffffffff);
+{ uint128_t x31 = (x29 + x27);
+{ uint64_t x32 = (uint64_t) (x31 >> 0x34);
+{ uint64_t x33 = ((uint64_t)x31 & 0xfffffffffffff);
+{ uint128_t x34 = (x32 + x26);
+{ uint64_t x35 = (uint64_t) (x34 >> 0x34);
+{ uint64_t x36 = ((uint64_t)x34 & 0xfffffffffffff);
+{ uint128_t x37 = (x35 + x25);
+{ uint64_t x38 = (uint64_t) (x37 >> 0x34);
+{ uint64_t x39 = ((uint64_t)x37 & 0xfffffffffffff);
+{ uint128_t x40 = (x38 + x24);
+{ uint64_t x41 = (uint64_t) (x40 >> 0x34);
+{ uint64_t x42 = ((uint64_t)x40 & 0xfffffffffffff);
+{ uint128_t x43 = (x41 + x23);
+{ uint64_t x44 = (uint64_t) (x43 >> 0x34);
+{ uint64_t x45 = ((uint64_t)x43 & 0xfffffffffffff);
+{ uint128_t x46 = (x44 + x22);
+{ uint64_t x47 = (uint64_t) (x46 >> 0x34);
+{ uint64_t x48 = ((uint64_t)x46 & 0xfffffffffffff);
+{ uint128_t x49 = (x47 + x21);
+{ uint64_t x50 = (uint64_t) (x49 >> 0x34);
+{ uint64_t x51 = ((uint64_t)x49 & 0xfffffffffffff);
+{ uint128_t x52 = (x50 + x20);
+{ uint64_t x53 = (uint64_t) (x52 >> 0x34);
+{ uint64_t x54 = ((uint64_t)x52 & 0xfffffffffffff);
+{ uint128_t x55 = (x53 + x19);
+{ uint64_t x56 = (uint64_t) (x55 >> 0x34);
+{ uint64_t x57 = ((uint64_t)x55 & 0xfffffffffffff);
+{ uint64_t x58 = (x30 + x56);
+{ uint64_t x59 = (x58 >> 0x35);
+{ uint64_t x60 = (x58 & 0x1fffffffffffff);
+{ uint64_t x61 = (x59 + x33);
+{ uint64_t x62 = (x61 >> 0x34);
+{ uint64_t x63 = (x61 & 0xfffffffffffff);
+out[0] = x57;
+out[1] = x54;
+out[2] = x51;
+out[3] = x48;
+out[4] = x45;
+out[5] = x42;
+out[6] = x39;
+out[7] = x62 + x36;
+out[8] = x63;
+out[9] = x60;
+}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
+// caller: uint64_t out[10];
diff --git a/src/Specific/solinas64_2e521m1/fesquare.h b/src/Specific/solinas64_2e521m1/fesquare.h
new file mode 100644
index 000000000..1005ebb62
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/fesquare.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline fesquare(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);
diff --git a/src/Specific/solinas64_2e521m1/freeze.c b/src/Specific/solinas64_2e521m1/freeze.c
new file mode 100644
index 000000000..24db516df
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/freeze.c
@@ -0,0 +1,25 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include "liblow.h"
+
+#include "freeze.h"
+
+typedef unsigned int uint128_t __attribute__((mode(TI)));
+
+#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__)||defined(__INTEL_COMPILER))
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+#define _subborrow_u32 __builtin_ia32_sbb_u32
+#define _subborrow_u64 __builtin_ia32_sbb_u64
+#endif
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2)
+out[0] = uint64_t x20;
+out[1] = uint8_t x21 = Op Syntax.SubWithGetBorrow 53 Syntax.TWord 3 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 6 Syntax.TWord 3 0x0;
+out[2] = x2;
+out[3] = 0x1fffffffffffff;;
+}
+// caller: uint64_t out[4];
diff --git a/src/Specific/solinas64_2e521m1/freeze.h b/src/Specific/solinas64_2e521m1/freeze.h
new file mode 100644
index 000000000..b674f66c9
--- /dev/null
+++ b/src/Specific/solinas64_2e521m1/freeze.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+void force_inline freeze(uint64_t* out, uint64_t x17, uint64_t x18, uint64_t x16, uint64_t x14, uint64_t x12, uint64_t x10, uint64_t x8, uint64_t x6, uint64_t x4, uint64_t x2);