aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore8
-rw-r--r--Makefile21
-rw-r--r--_CoqProject4
-rwxr-xr-xextract-function-header.sh9
-rwxr-xr-xextract-function.sh9
-rw-r--r--src/Specific/X25519/C32/femul.v100
-rw-r--r--src/Specific/X25519/C32/femulDisplay.log88
-rw-r--r--src/Specific/X25519/C32/femulDisplay.v4
-rw-r--r--src/Specific/X25519/C32/fesquare.v100
-rw-r--r--src/Specific/X25519/C32/fesquareDisplay.log88
-rw-r--r--src/Specific/X25519/C32/fesquareDisplay.v4
11 files changed, 423 insertions, 12 deletions
diff --git a/.gitignore b/.gitignore
index ebdeb9128..e9cd586b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,6 +47,14 @@ src/Specific/X25519/C64/ladderstep.c
src/Specific/X25519/C64/ladderstep.h
src/Specific/X25519/C64/measure
src/Specific/X25519/C64/test
+src/Specific/X25519/C32/femul.c
+src/Specific/X25519/C32/femul.h
+src/Specific/X25519/C32/fesquare.c
+src/Specific/X25519/C32/fesquare.h
+src/Specific/X25519/C32/ladderstep.c
+src/Specific/X25519/C32/ladderstep.h
+src/Specific/X25519/C32/measure
+src/Specific/X25519/C32/test
third_party/openssl-curve25519/measure
third_party/openssl-nistp256c64/measure
third_party/openssl-nistz256-adx/measure
diff --git a/Makefile b/Makefile
index 4a37bece3..3bf61d508 100644
--- a/Makefile
+++ b/Makefile
@@ -141,18 +141,27 @@ $(DISPLAY_NON_JAVA_VO:.vo=.log) : %Display.log : %.vo %Display.v src/Compilers/Z
c: $(DISPLAY_NON_JAVA_VO:Display.vo=.c) $(DISPLAY_NON_JAVA_VO:Display.vo=.h)
-$(DISPLAY_NON_JAVA_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh
- FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
+DISPLAY_X25519_C64_VO := $(filter src/Specific/X25519/C64/%,$(DISPLAY_NON_JAVA_VO))
+DISPLAY_X25519_C32_VO := $(filter src/Specific/X25519/C32/%,$(DISPLAY_NON_JAVA_VO))
+DISPLAY_NON_JAVA_C32_VO := $(DISPLAY_X25519_C32_VO)
+DISPLAY_NON_JAVA_C64_VO := $(filter-out $(DISPLAY_NON_JAVA_C32_VO),$(DISPLAY_NON_JAVA_VO))
+
+$(DISPLAY_NON_JAVA_C64_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh
+ BITWIDTH=64 FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
+
+$(DISPLAY_NON_JAVA_C32_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh
+ BITWIDTH=32 FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
-$(DISPLAY_NON_JAVA_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh
- ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
+$(DISPLAY_NON_JAVA_C64_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh
+ BITWIDTH=64 ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
+
+$(DISPLAY_NON_JAVA_C32_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh
+ BITWIDTH=32 ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@
$(DISPLAY_JAVA_VO:.vo=.log) : %JavaDisplay.log : %.vo %JavaDisplay.v src/Compilers/Z/JavaNotations.vo src/Specific/IntegrationTestDisplayCommon.vo
$(SHOW)"COQC $*JavaDisplay > $@"
$(HIDE)$(COQC) $(COQDEBUG) $(COQFLAGS) $*JavaDisplay.v | sed s'/\r\n/\n/g' > $@.tmp && mv -f $@.tmp $@
-DISPLAY_X25519_C64_VO := $(filter src/Specific/X25519/C64/%,$(DISPLAY_NON_JAVA_VO))
-
src/Specific/X25519/C64/test: src/Specific/X25519/C64/compiler.sh src/Specific/X25519/x25519_test.c $(DISPLAY_X25519_C64_VO:Display.vo=.c) $(DISPLAY_X25519_C64_VO:Display.vo=.h) src/Specific/X25519/C64/scalarmult.c
src/Specific/X25519/C64/compiler.sh -o src/Specific/X25519/C64/test -I liblow -I src/Specific/X25519/C64/ src/Specific/X25519/x25519_test.c $(DISPLAY_X25519_C64_VO:Display.vo=.c) src/Specific/X25519/C64/scalarmult.c
diff --git a/_CoqProject b/_CoqProject
index a041542cf..1475ca1d9 100644
--- a/_CoqProject
+++ b/_CoqProject
@@ -261,6 +261,10 @@ src/Specific/NISTP256/AMD64/fesubDisplay.v
src/Specific/NISTP256/FancyMachine256/Barrett.v
src/Specific/NISTP256/FancyMachine256/Core.v
src/Specific/NISTP256/FancyMachine256/Montgomery.v
+src/Specific/X25519/C32/femul.v
+src/Specific/X25519/C32/femulDisplay.v
+src/Specific/X25519/C32/fesquare.v
+src/Specific/X25519/C32/fesquareDisplay.v
src/Specific/X25519/C64/femul.v
src/Specific/X25519/C64/femulDisplay.v
src/Specific/X25519/C64/fesquare.v
diff --git a/extract-function-header.sh b/extract-function-header.sh
index 3535e3c37..36676db06 100755
--- a/extract-function-header.sh
+++ b/extract-function-header.sh
@@ -21,18 +21,21 @@ cat <<"EOF"
EOF
+if [ -z "${BITWIDTH}" ]; then
+ BITWIDTH=64
+fi
+
while IFS= read -r line; do
case "$line" in
*"λ '"*)
echo -n "void force_inline $funcname("
- echo -n "uint64_t* out"
+ echo -n "uint${BITWIDTH}_t* out"
echo "$line" | grep -owP -- '\w+\d+' | \
while IFS= read -r arg; do
- echo -n ", uint64_t $arg"
+ echo -n ", uint${BITWIDTH}_t $arg"
done
echo ');'
break
;;
esac
done
-
diff --git a/extract-function.sh b/extract-function.sh
index 8653c4b32..024dda40f 100755
--- a/extract-function.sh
+++ b/extract-function.sh
@@ -44,14 +44,17 @@ if [ ! -z "${FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM}" ]; then
brace=''
close_brace=''
fi
+if [ -z "${BITWIDTH}" ]; then
+ BITWIDTH=64
+fi
while IFS= read -r line; do
case "$line" in
*"λ '"*)
echo -n "void force_inline $funcname("
- echo -n "uint64_t* out"
+ echo -n "uint${BITWIDTH}_t* out"
echo "$line" | grep -owP -- '\w+\d+' | \
while IFS= read -r arg; do
- echo -n ", uint64_t $arg"
+ echo -n ", uint${BITWIDTH}_t $arg"
done
echo -n ')'
echo "${function_open_brace}"
@@ -69,7 +72,7 @@ while IFS= read -r line; do
echo -n "${close_brace}"
done
echo "}"
- echo "// caller: uint64_t out[$i];" )
+ echo "// caller: uint${BITWIDTH}_t out[$i];" )
show=false
break
;;
diff --git a/src/Specific/X25519/C32/femul.v b/src/Specific/X25519/C32/femul.v
new file mode 100644
index 000000000..19959f19e
--- /dev/null
+++ b/src/Specific/X25519/C32/femul.v
@@ -0,0 +1,100 @@
+Require Import Coq.ZArith.ZArith.
+Require Import Coq.Lists.List.
+Local Open Scope Z_scope.
+
+Require Import Crypto.Arithmetic.Core.
+Require Import Crypto.Util.FixedWordSizes.
+Require Import Crypto.Specific.ArithmeticSynthesisTest32.
+Require Import Crypto.Arithmetic.PrimeFieldTheorems.
+Require Import Crypto.Util.Tuple Crypto.Util.Sigma Crypto.Util.Sigma.MapProjections Crypto.Util.Sigma.Lift Crypto.Util.Notations Crypto.Util.ZRange Crypto.Util.BoundedWord.
+Require Import Crypto.Util.Tactics.Head.
+Require Import Crypto.Util.Tactics.MoveLetIn.
+Import ListNotations.
+
+Require Import Crypto.Specific.IntegrationTestTemporaryMiscCommon.
+
+Require Import Crypto.Compilers.Z.Bounds.Pipeline.
+
+Section BoundedField25p5.
+ Local Coercion Z.of_nat : nat >-> Z.
+
+ Let limb_widths := Eval vm_compute in (List.map (fun i => Z.log2 (wt (S i) / wt i)) (seq 0 sz)).
+ Let length_lw := Eval compute in List.length limb_widths.
+
+ Local Notation b_of exp := {| lower := 0 ; upper := 2^exp + 2^(exp-3) |}%Z (only parsing). (* max is [(0, 2^(exp+2) + 2^exp + 2^(exp-1) + 2^(exp-3) + 2^(exp-4) + 2^(exp-5) + 2^(exp-6) + 2^(exp-10) + 2^(exp-12) + 2^(exp-13) + 2^(exp-14) + 2^(exp-15) + 2^(exp-17) + 2^(exp-23) + 2^(exp-24))%Z] *)
+ (* The definition [bounds_exp] is a tuple-version of the
+ limb-widths, which are the [exp] argument in [b_of] above, i.e.,
+ the approximate base-2 exponent of the bounds on the limb in that
+ position. *)
+ Let bounds_exp : Tuple.tuple Z length_lw
+ := Eval compute in
+ Tuple.from_list length_lw limb_widths eq_refl.
+ Let bounds : Tuple.tuple zrange length_lw
+ := Eval compute in
+ Tuple.map (fun e => b_of e) bounds_exp.
+
+ Let lgbitwidth := Eval compute in (Z.to_nat (Z.log2_up (List.fold_right Z.max 0 limb_widths))).
+ Let bitwidth := Eval compute in (2^lgbitwidth)%nat.
+ Let feZ : Type := tuple Z sz.
+ Let feW : Type := tuple (wordT lgbitwidth) sz.
+ Let feBW : Type := BoundedWord sz bitwidth bounds.
+ Let phi : feBW -> F m :=
+ fun x => B.Positional.Fdecode wt (BoundedWordToZ _ _ _ x).
+
+ (* TODO : change this to field once field isomorphism happens *)
+ Definition mul :
+ { mul : feBW -> feBW -> feBW
+ | forall a b, phi (mul a b) = F.mul (phi a) (phi b) }.
+ Proof.
+ start_preglue.
+ do_rewrite_with_2sig_add_carry mul_sig carry_sig; cbv_runtime.
+ all:fin_preglue.
+ (* jgross start here! *)
+ (*Set Ltac Profiling.*)
+ Time refine_reflectively32. (* Finished transaction in 19.348 secs (19.284u,0.036s) (successful) *)
+ (*Show Ltac Profile.*)
+ (* total time: 19.632s
+
+ tactic local total calls max
+────────────────────────────────────────┴──────┴──────┴───────┴─────────┘
+─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s
+─ReflectiveTactics.do_reflective_pipelin -0.0% 96.2% 1 18.884s
+─ReflectiveTactics.solve_side_conditions 1.2% 96.1% 1 18.860s
+─ReflectiveTactics.do_reify ------------ 27.7% 44.0% 1 8.640s
+─ReflectiveTactics.abstract_vm_compute_r 12.3% 13.9% 2 2.024s
+─ReflectiveTactics.abstract_vm_compute_r 8.9% 12.2% 2 1.576s
+─Reify_rhs_gen ------------------------- 0.8% 11.7% 1 2.300s
+─ReflectiveTactics.renamify_rhs -------- 10.4% 11.5% 1 2.260s
+─ReflectiveTactics.abstract_rhs -------- 4.6% 5.8% 1 1.148s
+─clear (var_list) ---------------------- 5.2% 5.2% 57 0.184s
+─eexact -------------------------------- 4.1% 4.1% 68 0.028s
+─prove_interp_compile_correct ---------- 0.0% 3.4% 1 0.664s
+─ReflectiveTactics.abstract_cbv_interp_r 2.7% 3.3% 1 0.648s
+─unify (constr) (constr) --------------- 3.2% 3.2% 6 0.248s
+─rewrite ?EtaInterp.InterpExprEta ------ 3.1% 3.1% 1 0.612s
+─ReflectiveTactics.abstract_cbv_rhs ---- 2.0% 2.7% 1 0.532s
+─Glue.refine_to_reflective_glue -------- 0.0% 2.2% 1 0.436s
+─rewrite H ----------------------------- 2.1% 2.1% 1 0.420s
+
+ tactic local total calls max
+────────────────────────────────────────┴──────┴──────┴───────┴─────────┘
+─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s
+ ├─ReflectiveTactics.do_reflective_pipel -0.0% 96.2% 1 18.884s
+ │└ReflectiveTactics.solve_side_conditio 1.2% 96.1% 1 18.860s
+ │ ├─ReflectiveTactics.do_reify -------- 27.7% 44.0% 1 8.640s
+ │ │ ├─Reify_rhs_gen ------------------- 0.8% 11.7% 1 2.300s
+ │ │ │ ├─prove_interp_compile_correct -- 0.0% 3.4% 1 0.664s
+ │ │ │ │└rewrite ?EtaInterp.InterpExprEt 3.1% 3.1% 1 0.612s
+ │ │ │ └─rewrite H --------------------- 2.1% 2.1% 1 0.420s
+ │ │ └─eexact -------------------------- 4.1% 4.1% 68 0.028s
+ │ ├─ReflectiveTactics.abstract_vm_compu 12.3% 13.9% 2 2.024s
+ │ ├─ReflectiveTactics.abstract_vm_compu 8.9% 12.2% 2 1.576s
+ │ ├─ReflectiveTactics.renamify_rhs ---- 10.4% 11.5% 1 2.260s
+ │ ├─ReflectiveTactics.abstract_rhs ---- 4.6% 5.8% 1 1.148s
+ │ ├─ReflectiveTactics.abstract_cbv_inte 2.7% 3.3% 1 0.648s
+ │ └─ReflectiveTactics.abstract_cbv_rhs 2.0% 2.7% 1 0.532s
+ └─Glue.refine_to_reflective_glue ------ 0.0% 2.2% 1 0.436s
+*)
+ Time Defined. (* Finished transaction in 10.167 secs (10.123u,0.023s) (successful) *)
+
+End BoundedField25p5.
diff --git a/src/Specific/X25519/C32/femulDisplay.log b/src/Specific/X25519/C32/femulDisplay.log
new file mode 100644
index 000000000..12fa54d8a
--- /dev/null
+++ b/src/Specific/X25519/C32/femulDisplay.log
@@ -0,0 +1,88 @@
+λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x20, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x38, x39, x37, x35, x33, x31, x29, x27, x25, x23))%core,
+ uint64_t x40 = (uint64_t) x23 * x5;
+ uint64_t x41 = (uint64_t) x23 * x7 + (uint64_t) x25 * x5;
+ uint64_t x42 = (uint64_t) (0x2 * x25) * x7 + (uint64_t) x23 * x9 + (uint64_t) x27 * x5;
+ uint64_t x43 = (uint64_t) x25 * x9 + (uint64_t) x27 * x7 + (uint64_t) x23 * x11 + (uint64_t) x29 * x5;
+ uint64_t x44 = (uint64_t) x27 * x9 + 0x2 * ((uint64_t) x25 * x11 + (uint64_t) x29 * x7) + (uint64_t) x23 * x13 + (uint64_t) x31 * x5;
+ uint64_t x45 = (uint64_t) x27 * x11 + (uint64_t) x29 * x9 + (uint64_t) x25 * x13 + (uint64_t) x31 * x7 + (uint64_t) x23 * x15 + (uint64_t) x33 * x5;
+ uint64_t x46 = 0x2 * ((uint64_t) x29 * x11 + (uint64_t) x25 * x15 + (uint64_t) x33 * x7) + (uint64_t) x27 * x13 + (uint64_t) x31 * x9 + (uint64_t) x23 * x17 + (uint64_t) x35 * x5;
+ uint64_t x47 = (uint64_t) x29 * x13 + (uint64_t) x31 * x11 + (uint64_t) x27 * x15 + (uint64_t) x33 * x9 + (uint64_t) x25 * x17 + (uint64_t) x35 * x7 + (uint64_t) x23 * x19 + (uint64_t) x37 * x5;
+ uint64_t x48 = (uint64_t) x31 * x13 + 0x2 * ((uint64_t) x29 * x15 + (uint64_t) x33 * x11 + (uint64_t) x25 * x19 + (uint64_t) x37 * x7) + (uint64_t) x27 * x17 + (uint64_t) x35 * x9 + (uint64_t) x23 * x21 + (uint64_t) x39 * x5;
+ uint64_t x49 = (uint64_t) x31 * x15 + (uint64_t) x33 * x13 + (uint64_t) x29 * x17 + (uint64_t) x35 * x11 + (uint64_t) x27 * x19 + (uint64_t) x37 * x9 + (uint64_t) x25 * x21 + (uint64_t) x39 * x7 + (uint64_t) x23 * x20 + (uint64_t) x38 * x5;
+ uint64_t x50 = 0x2 * ((uint64_t) x33 * x15 + (uint64_t) x29 * x19 + (uint64_t) x37 * x11 + (uint64_t) x25 * x20 + (uint64_t) x38 * x7) + (uint64_t) x31 * x17 + (uint64_t) x35 * x13 + (uint64_t) x27 * x21 + (uint64_t) x39 * x9;
+ uint64_t x51 = (uint64_t) x33 * x17 + (uint64_t) x35 * x15 + (uint64_t) x31 * x19 + (uint64_t) x37 * x13 + (uint64_t) x29 * x21 + (uint64_t) x39 * x11 + (uint64_t) x27 * x20 + (uint64_t) x38 * x9;
+ uint64_t x52 = (uint64_t) x35 * x17 + 0x2 * ((uint64_t) x33 * x19 + (uint64_t) x37 * x15 + (uint64_t) x29 * x20 + (uint64_t) x38 * x11) + (uint64_t) x31 * x21 + (uint64_t) x39 * x13;
+ uint64_t x53 = (uint64_t) x35 * x19 + (uint64_t) x37 * x17 + (uint64_t) x33 * x21 + (uint64_t) x39 * x15 + (uint64_t) x31 * x20 + (uint64_t) x38 * x13;
+ uint64_t x54 = 0x2 * ((uint64_t) x37 * x19 + (uint64_t) x33 * x20 + (uint64_t) x38 * x15) + (uint64_t) x35 * x21 + (uint64_t) x39 * x17;
+ uint64_t x55 = (uint64_t) x37 * x21 + (uint64_t) x39 * x19 + (uint64_t) x35 * x20 + (uint64_t) x38 * x17;
+ uint64_t x56 = (uint64_t) x39 * x21 + 0x2 * ((uint64_t) x37 * x20 + (uint64_t) x38 * x19);
+ uint64_t x57 = (uint64_t) x39 * x20 + (uint64_t) x38 * x21;
+ uint64_t x58 = (uint64_t) (0x2 * x38) * x20;
+ uint64_t x59 = x48 + x58 << 0x4;
+ uint64_t x60 = x59 + x58 << 0x1;
+ uint64_t x61 = x60 + x58;
+ uint64_t x62 = x47 + x57 << 0x4;
+ uint64_t x63 = x62 + x57 << 0x1;
+ uint64_t x64 = x63 + x57;
+ uint64_t x65 = x46 + x56 << 0x4;
+ uint64_t x66 = x65 + x56 << 0x1;
+ uint64_t x67 = x66 + x56;
+ uint64_t x68 = x45 + x55 << 0x4;
+ uint64_t x69 = x68 + x55 << 0x1;
+ uint64_t x70 = x69 + x55;
+ uint64_t x71 = x44 + x54 << 0x4;
+ uint64_t x72 = x71 + x54 << 0x1;
+ uint64_t x73 = x72 + x54;
+ uint64_t x74 = x43 + x53 << 0x4;
+ uint64_t x75 = x74 + x53 << 0x1;
+ uint64_t x76 = x75 + x53;
+ uint64_t x77 = x42 + x52 << 0x4;
+ uint64_t x78 = x77 + x52 << 0x1;
+ uint64_t x79 = x78 + x52;
+ uint64_t x80 = x41 + x51 << 0x4;
+ uint64_t x81 = x80 + x51 << 0x1;
+ uint64_t x82 = x81 + x51;
+ uint64_t x83 = x40 + x50 << 0x4;
+ uint64_t x84 = x83 + x50 << 0x1;
+ uint64_t x85 = x84 + x50;
+ uint64_t x86 = x85 >> 0x1a;
+ uint32_t x87 = (uint32_t) x85 & 0x3ffffff;
+ uint64_t x88 = x86 + x82;
+ uint64_t x89 = x88 >> 0x19;
+ uint32_t x90 = (uint32_t) x88 & 0x1ffffff;
+ uint64_t x91 = x89 + x79;
+ uint64_t x92 = x91 >> 0x1a;
+ uint32_t x93 = (uint32_t) x91 & 0x3ffffff;
+ uint64_t x94 = x92 + x76;
+ uint64_t x95 = x94 >> 0x19;
+ uint32_t x96 = (uint32_t) x94 & 0x1ffffff;
+ uint64_t x97 = x95 + x73;
+ uint64_t x98 = x97 >> 0x1a;
+ uint32_t x99 = (uint32_t) x97 & 0x3ffffff;
+ uint64_t x100 = x98 + x70;
+ uint64_t x101 = x100 >> 0x19;
+ uint32_t x102 = (uint32_t) x100 & 0x1ffffff;
+ uint64_t x103 = x101 + x67;
+ uint32_t x104 = (uint32_t) (x103 >> 0x1a);
+ uint32_t x105 = (uint32_t) x103 & 0x3ffffff;
+ uint64_t x106 = x104 + x64;
+ uint32_t x107 = (uint32_t) (x106 >> 0x19);
+ uint32_t x108 = (uint32_t) x106 & 0x1ffffff;
+ uint64_t x109 = x107 + x61;
+ uint32_t x110 = (uint32_t) (x109 >> 0x1a);
+ uint32_t x111 = (uint32_t) x109 & 0x3ffffff;
+ uint64_t x112 = x110 + x49;
+ uint32_t x113 = (uint32_t) (x112 >> 0x19);
+ uint32_t x114 = (uint32_t) x112 & 0x1ffffff;
+ uint64_t x115 = x87 + (uint64_t) 0x13 * x113;
+ uint32_t x116 = (uint32_t) (x115 >> 0x1a);
+ uint32_t x117 = (uint32_t) x115 & 0x3ffffff;
+ uint32_t x118 = x116 + x90;
+ uint32_t x119 = x118 >> 0x19;
+ uint32_t x120 = x118 & 0x1ffffff;
+ return (Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, x119 + x93, Return x120, Return x117))
+(x, x0)%core
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/X25519/C32/femulDisplay.v b/src/Specific/X25519/C32/femulDisplay.v
new file mode 100644
index 000000000..e603a33c0
--- /dev/null
+++ b/src/Specific/X25519/C32/femulDisplay.v
@@ -0,0 +1,4 @@
+Require Import Crypto.Specific.X25519.C32.femul.
+Require Import Crypto.Specific.IntegrationTestDisplayCommon.
+
+Check display mul.
diff --git a/src/Specific/X25519/C32/fesquare.v b/src/Specific/X25519/C32/fesquare.v
new file mode 100644
index 000000000..00bf7c08b
--- /dev/null
+++ b/src/Specific/X25519/C32/fesquare.v
@@ -0,0 +1,100 @@
+Require Import Coq.ZArith.ZArith.
+Require Import Coq.Lists.List.
+Local Open Scope Z_scope.
+
+Require Import Crypto.Arithmetic.Core.
+Require Import Crypto.Util.FixedWordSizes.
+Require Import Crypto.Specific.ArithmeticSynthesisTest32.
+Require Import Crypto.Arithmetic.PrimeFieldTheorems.
+Require Import Crypto.Util.Tuple Crypto.Util.Sigma Crypto.Util.Sigma.MapProjections Crypto.Util.Sigma.Lift Crypto.Util.Notations Crypto.Util.ZRange Crypto.Util.BoundedWord.
+Require Import Crypto.Util.Tactics.Head.
+Require Import Crypto.Util.Tactics.MoveLetIn.
+Import ListNotations.
+
+Require Import Crypto.Specific.IntegrationTestTemporaryMiscCommon.
+
+Require Import Crypto.Compilers.Z.Bounds.Pipeline.
+
+Section BoundedField25p5.
+ Local Coercion Z.of_nat : nat >-> Z.
+
+ Let limb_widths := Eval vm_compute in (List.map (fun i => Z.log2 (wt (S i) / wt i)) (seq 0 sz)).
+ Let length_lw := Eval compute in List.length limb_widths.
+
+ Local Notation b_of exp := {| lower := 0 ; upper := 2^exp + 2^(exp-3) |}%Z (only parsing). (* max is [(0, 2^(exp+2) + 2^exp + 2^(exp-1) + 2^(exp-3) + 2^(exp-4) + 2^(exp-5) + 2^(exp-6) + 2^(exp-10) + 2^(exp-12) + 2^(exp-13) + 2^(exp-14) + 2^(exp-15) + 2^(exp-17) + 2^(exp-23) + 2^(exp-24))%Z] *)
+ (* The definition [bounds_exp] is a tuple-version of the
+ limb-widths, which are the [exp] argument in [b_of] above, i.e.,
+ the approximate base-2 exponent of the bounds on the limb in that
+ position. *)
+ Let bounds_exp : Tuple.tuple Z length_lw
+ := Eval compute in
+ Tuple.from_list length_lw limb_widths eq_refl.
+ Let bounds : Tuple.tuple zrange length_lw
+ := Eval compute in
+ Tuple.map (fun e => b_of e) bounds_exp.
+
+ Let lgbitwidth := Eval compute in (Z.to_nat (Z.log2_up (List.fold_right Z.max 0 limb_widths))).
+ Let bitwidth := Eval compute in (2^lgbitwidth)%nat.
+ Let feZ : Type := tuple Z sz.
+ Let feW : Type := tuple (wordT lgbitwidth) sz.
+ Let feBW : Type := BoundedWord sz bitwidth bounds.
+ Let phi : feBW -> F m :=
+ fun x => B.Positional.Fdecode wt (BoundedWordToZ _ _ _ x).
+
+ (* TODO : change this to field once field isomorphism happens *)
+ Definition square :
+ { square : feBW -> feBW
+ | forall a, phi (square a) = F.mul (phi a) (phi a) }.
+ Proof.
+ start_preglue.
+ do_rewrite_with_1sig_add_carry square_sig carry_sig; cbv_runtime.
+ all:fin_preglue.
+ (* jgross start here! *)
+ (*Set Ltac Profiling.*)
+ Time refine_reflectively32. (* Finished transaction in 19.348 secs (19.284u,0.036s) (successful) *)
+ (*Show Ltac Profile.*)
+ (* total time: 19.632s
+
+ tactic local total calls max
+────────────────────────────────────────┴──────┴──────┴───────┴─────────┘
+─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s
+─ReflectiveTactics.do_reflective_pipelin -0.0% 96.2% 1 18.884s
+─ReflectiveTactics.solve_side_conditions 1.2% 96.1% 1 18.860s
+─ReflectiveTactics.do_reify ------------ 27.7% 44.0% 1 8.640s
+─ReflectiveTactics.abstract_vm_compute_r 12.3% 13.9% 2 2.024s
+─ReflectiveTactics.abstract_vm_compute_r 8.9% 12.2% 2 1.576s
+─Reify_rhs_gen ------------------------- 0.8% 11.7% 1 2.300s
+─ReflectiveTactics.renamify_rhs -------- 10.4% 11.5% 1 2.260s
+─ReflectiveTactics.abstract_rhs -------- 4.6% 5.8% 1 1.148s
+─clear (var_list) ---------------------- 5.2% 5.2% 57 0.184s
+─eexact -------------------------------- 4.1% 4.1% 68 0.028s
+─prove_interp_compile_correct ---------- 0.0% 3.4% 1 0.664s
+─ReflectiveTactics.abstract_cbv_interp_r 2.7% 3.3% 1 0.648s
+─unify (constr) (constr) --------------- 3.2% 3.2% 6 0.248s
+─rewrite ?EtaInterp.InterpExprEta ------ 3.1% 3.1% 1 0.612s
+─ReflectiveTactics.abstract_cbv_rhs ---- 2.0% 2.7% 1 0.532s
+─Glue.refine_to_reflective_glue -------- 0.0% 2.2% 1 0.436s
+─rewrite H ----------------------------- 2.1% 2.1% 1 0.420s
+
+ tactic local total calls max
+────────────────────────────────────────┴──────┴──────┴───────┴─────────┘
+─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s
+ ├─ReflectiveTactics.do_reflective_pipel -0.0% 96.2% 1 18.884s
+ │└ReflectiveTactics.solve_side_conditio 1.2% 96.1% 1 18.860s
+ │ ├─ReflectiveTactics.do_reify -------- 27.7% 44.0% 1 8.640s
+ │ │ ├─Reify_rhs_gen ------------------- 0.8% 11.7% 1 2.300s
+ │ │ │ ├─prove_interp_compile_correct -- 0.0% 3.4% 1 0.664s
+ │ │ │ │└rewrite ?EtaInterp.InterpExprEt 3.1% 3.1% 1 0.612s
+ │ │ │ └─rewrite H --------------------- 2.1% 2.1% 1 0.420s
+ │ │ └─eexact -------------------------- 4.1% 4.1% 68 0.028s
+ │ ├─ReflectiveTactics.abstract_vm_compu 12.3% 13.9% 2 2.024s
+ │ ├─ReflectiveTactics.abstract_vm_compu 8.9% 12.2% 2 1.576s
+ │ ├─ReflectiveTactics.renamify_rhs ---- 10.4% 11.5% 1 2.260s
+ │ ├─ReflectiveTactics.abstract_rhs ---- 4.6% 5.8% 1 1.148s
+ │ ├─ReflectiveTactics.abstract_cbv_inte 2.7% 3.3% 1 0.648s
+ │ └─ReflectiveTactics.abstract_cbv_rhs 2.0% 2.7% 1 0.532s
+ └─Glue.refine_to_reflective_glue ------ 0.0% 2.2% 1 0.436s
+*)
+ Time Defined. (* Finished transaction in 10.167 secs (10.123u,0.023s) (successful) *)
+
+End BoundedField25p5.
diff --git a/src/Specific/X25519/C32/fesquareDisplay.log b/src/Specific/X25519/C32/fesquareDisplay.log
new file mode 100644
index 000000000..d9b0c533f
--- /dev/null
+++ b/src/Specific/X25519/C32/fesquareDisplay.log
@@ -0,0 +1,88 @@
+λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32,
+Interp-η
+(λ var : Syntax.base_type → Type,
+ λ '(x17, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core,
+ uint64_t x19 = (uint64_t) x2 * x2;
+ uint64_t x20 = (uint64_t) (0x2 * x2) * x4;
+ uint64_t x21 = 0x2 * ((uint64_t) x4 * x4 + (uint64_t) x2 * x6);
+ uint64_t x22 = 0x2 * ((uint64_t) x4 * x6 + (uint64_t) x2 * x8);
+ uint64_t x23 = (uint64_t) x6 * x6 + (uint64_t) (0x4 * x4) * x8 + (uint64_t) (0x2 * x2) * x10;
+ uint64_t x24 = 0x2 * ((uint64_t) x6 * x8 + (uint64_t) x4 * x10 + (uint64_t) x2 * x12);
+ uint64_t x25 = 0x2 * ((uint64_t) x8 * x8 + (uint64_t) x6 * x10 + (uint64_t) x2 * x14 + (uint64_t) (0x2 * x4) * x12);
+ uint64_t x26 = 0x2 * ((uint64_t) x8 * x10 + (uint64_t) x6 * x12 + (uint64_t) x4 * x14 + (uint64_t) x2 * x16);
+ uint64_t x27 = (uint64_t) x10 * x10 + 0x2 * ((uint64_t) x6 * x14 + (uint64_t) x2 * x18 + 0x2 * ((uint64_t) x4 * x16 + (uint64_t) x8 * x12));
+ uint64_t x28 = 0x2 * ((uint64_t) x10 * x12 + (uint64_t) x8 * x14 + (uint64_t) x6 * x16 + (uint64_t) x4 * x18 + (uint64_t) x2 * x17);
+ uint64_t x29 = 0x2 * ((uint64_t) x12 * x12 + (uint64_t) x10 * x14 + (uint64_t) x6 * x18 + 0x2 * ((uint64_t) x8 * x16 + (uint64_t) x4 * x17));
+ uint64_t x30 = 0x2 * ((uint64_t) x12 * x14 + (uint64_t) x10 * x16 + (uint64_t) x8 * x18 + (uint64_t) x6 * x17);
+ uint64_t x31 = (uint64_t) x14 * x14 + 0x2 * ((uint64_t) x10 * x18 + 0x2 * ((uint64_t) x12 * x16 + (uint64_t) x8 * x17));
+ uint64_t x32 = 0x2 * ((uint64_t) x14 * x16 + (uint64_t) x12 * x18 + (uint64_t) x10 * x17);
+ uint64_t x33 = 0x2 * ((uint64_t) x16 * x16 + (uint64_t) x14 * x18 + (uint64_t) (0x2 * x12) * x17);
+ uint64_t x34 = 0x2 * ((uint64_t) x16 * x18 + (uint64_t) x14 * x17);
+ uint64_t x35 = (uint64_t) x18 * x18 + (uint64_t) (0x4 * x16) * x17;
+ uint64_t x36 = (uint64_t) (0x2 * x18) * x17;
+ uint64_t x37 = (uint64_t) (0x2 * x17) * x17;
+ uint64_t x38 = x27 + x37 << 0x4;
+ uint64_t x39 = x38 + x37 << 0x1;
+ uint64_t x40 = x39 + x37;
+ uint64_t x41 = x26 + x36 << 0x4;
+ uint64_t x42 = x41 + x36 << 0x1;
+ uint64_t x43 = x42 + x36;
+ uint64_t x44 = x25 + x35 << 0x4;
+ uint64_t x45 = x44 + x35 << 0x1;
+ uint64_t x46 = x45 + x35;
+ uint64_t x47 = x24 + x34 << 0x4;
+ uint64_t x48 = x47 + x34 << 0x1;
+ uint64_t x49 = x48 + x34;
+ uint64_t x50 = x23 + x33 << 0x4;
+ uint64_t x51 = x50 + x33 << 0x1;
+ uint64_t x52 = x51 + x33;
+ uint64_t x53 = x22 + x32 << 0x4;
+ uint64_t x54 = x53 + x32 << 0x1;
+ uint64_t x55 = x54 + x32;
+ uint64_t x56 = x21 + x31 << 0x4;
+ uint64_t x57 = x56 + x31 << 0x1;
+ uint64_t x58 = x57 + x31;
+ uint64_t x59 = x20 + x30 << 0x4;
+ uint64_t x60 = x59 + x30 << 0x1;
+ uint64_t x61 = x60 + x30;
+ uint64_t x62 = x19 + x29 << 0x4;
+ uint64_t x63 = x62 + x29 << 0x1;
+ uint64_t x64 = x63 + x29;
+ uint64_t x65 = x64 >> 0x1a;
+ uint32_t x66 = (uint32_t) x64 & 0x3ffffff;
+ uint64_t x67 = x65 + x61;
+ uint64_t x68 = x67 >> 0x19;
+ uint32_t x69 = (uint32_t) x67 & 0x1ffffff;
+ uint64_t x70 = x68 + x58;
+ uint64_t x71 = x70 >> 0x1a;
+ uint32_t x72 = (uint32_t) x70 & 0x3ffffff;
+ uint64_t x73 = x71 + x55;
+ uint64_t x74 = x73 >> 0x19;
+ uint32_t x75 = (uint32_t) x73 & 0x1ffffff;
+ uint64_t x76 = x74 + x52;
+ uint64_t x77 = x76 >> 0x1a;
+ uint32_t x78 = (uint32_t) x76 & 0x3ffffff;
+ uint64_t x79 = x77 + x49;
+ uint64_t x80 = x79 >> 0x19;
+ uint32_t x81 = (uint32_t) x79 & 0x1ffffff;
+ uint64_t x82 = x80 + x46;
+ uint32_t x83 = (uint32_t) (x82 >> 0x1a);
+ uint32_t x84 = (uint32_t) x82 & 0x3ffffff;
+ uint64_t x85 = x83 + x43;
+ uint32_t x86 = (uint32_t) (x85 >> 0x19);
+ uint32_t x87 = (uint32_t) x85 & 0x1ffffff;
+ uint64_t x88 = x86 + x40;
+ uint32_t x89 = (uint32_t) (x88 >> 0x1a);
+ uint32_t x90 = (uint32_t) x88 & 0x3ffffff;
+ uint64_t x91 = x89 + x28;
+ uint32_t x92 = (uint32_t) (x91 >> 0x19);
+ uint32_t x93 = (uint32_t) x91 & 0x1ffffff;
+ uint64_t x94 = x66 + (uint64_t) 0x13 * x92;
+ uint32_t x95 = (uint32_t) (x94 >> 0x1a);
+ uint32_t x96 = (uint32_t) x94 & 0x3ffffff;
+ uint32_t x97 = x95 + x69;
+ uint32_t x98 = x97 >> 0x19;
+ uint32_t x99 = x97 & 0x1ffffff;
+ return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, x98 + x72, Return x99, Return x96))
+x
+ : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t)
diff --git a/src/Specific/X25519/C32/fesquareDisplay.v b/src/Specific/X25519/C32/fesquareDisplay.v
new file mode 100644
index 000000000..1cc917c7e
--- /dev/null
+++ b/src/Specific/X25519/C32/fesquareDisplay.v
@@ -0,0 +1,4 @@
+Require Import Crypto.Specific.X25519.C32.fesquare.
+Require Import Crypto.Specific.IntegrationTestDisplayCommon.
+
+Check display square.