From 3f87426dc1d3ed69f3cb327e41c4947b80b4c42c Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Thu, 21 Sep 2017 14:03:21 -0400 Subject: Add femul,fesqure for C32 32-bit ladderstep takes way too long (at least on Coq 8.6), so we don't add it yet --- .gitignore | 8 +++ Makefile | 21 ++++-- _CoqProject | 4 ++ extract-function-header.sh | 9 ++- extract-function.sh | 9 ++- src/Specific/X25519/C32/femul.v | 100 ++++++++++++++++++++++++++++ src/Specific/X25519/C32/femulDisplay.log | 88 ++++++++++++++++++++++++ src/Specific/X25519/C32/femulDisplay.v | 4 ++ src/Specific/X25519/C32/fesquare.v | 100 ++++++++++++++++++++++++++++ src/Specific/X25519/C32/fesquareDisplay.log | 88 ++++++++++++++++++++++++ src/Specific/X25519/C32/fesquareDisplay.v | 4 ++ 11 files changed, 423 insertions(+), 12 deletions(-) create mode 100644 src/Specific/X25519/C32/femul.v create mode 100644 src/Specific/X25519/C32/femulDisplay.log create mode 100644 src/Specific/X25519/C32/femulDisplay.v create mode 100644 src/Specific/X25519/C32/fesquare.v create mode 100644 src/Specific/X25519/C32/fesquareDisplay.log create mode 100644 src/Specific/X25519/C32/fesquareDisplay.v diff --git a/.gitignore b/.gitignore index ebdeb9128..e9cd586b6 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,14 @@ src/Specific/X25519/C64/ladderstep.c src/Specific/X25519/C64/ladderstep.h src/Specific/X25519/C64/measure src/Specific/X25519/C64/test +src/Specific/X25519/C32/femul.c +src/Specific/X25519/C32/femul.h +src/Specific/X25519/C32/fesquare.c +src/Specific/X25519/C32/fesquare.h +src/Specific/X25519/C32/ladderstep.c +src/Specific/X25519/C32/ladderstep.h +src/Specific/X25519/C32/measure +src/Specific/X25519/C32/test third_party/openssl-curve25519/measure third_party/openssl-nistp256c64/measure third_party/openssl-nistz256-adx/measure diff --git a/Makefile b/Makefile index 4a37bece3..3bf61d508 100644 --- a/Makefile +++ b/Makefile @@ -141,18 +141,27 @@ $(DISPLAY_NON_JAVA_VO:.vo=.log) : %Display.log : %.vo %Display.v src/Compilers/Z c: $(DISPLAY_NON_JAVA_VO:Display.vo=.c) $(DISPLAY_NON_JAVA_VO:Display.vo=.h) -$(DISPLAY_NON_JAVA_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh - FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ +DISPLAY_X25519_C64_VO := $(filter src/Specific/X25519/C64/%,$(DISPLAY_NON_JAVA_VO)) +DISPLAY_X25519_C32_VO := $(filter src/Specific/X25519/C32/%,$(DISPLAY_NON_JAVA_VO)) +DISPLAY_NON_JAVA_C32_VO := $(DISPLAY_X25519_C32_VO) +DISPLAY_NON_JAVA_C64_VO := $(filter-out $(DISPLAY_NON_JAVA_C32_VO),$(DISPLAY_NON_JAVA_VO)) + +$(DISPLAY_NON_JAVA_C64_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh + BITWIDTH=64 FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ + +$(DISPLAY_NON_JAVA_C32_VO:Display.vo=.c) : %.c : %Display.log extract-function.sh + BITWIDTH=32 FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM="" ./extract-function.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ -$(DISPLAY_NON_JAVA_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh - ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ +$(DISPLAY_NON_JAVA_C64_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh + BITWIDTH=64 ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ + +$(DISPLAY_NON_JAVA_C32_VO:Display.vo=.h) : %.h : %Display.log extract-function-header.sh + BITWIDTH=32 ./extract-function-header.sh $(patsubst %Display.log,%,$(notdir $<)) < $< > $@ $(DISPLAY_JAVA_VO:.vo=.log) : %JavaDisplay.log : %.vo %JavaDisplay.v src/Compilers/Z/JavaNotations.vo src/Specific/IntegrationTestDisplayCommon.vo $(SHOW)"COQC $*JavaDisplay > $@" $(HIDE)$(COQC) $(COQDEBUG) $(COQFLAGS) $*JavaDisplay.v | sed s'/\r\n/\n/g' > $@.tmp && mv -f $@.tmp $@ -DISPLAY_X25519_C64_VO := $(filter src/Specific/X25519/C64/%,$(DISPLAY_NON_JAVA_VO)) - src/Specific/X25519/C64/test: src/Specific/X25519/C64/compiler.sh src/Specific/X25519/x25519_test.c $(DISPLAY_X25519_C64_VO:Display.vo=.c) $(DISPLAY_X25519_C64_VO:Display.vo=.h) src/Specific/X25519/C64/scalarmult.c src/Specific/X25519/C64/compiler.sh -o src/Specific/X25519/C64/test -I liblow -I src/Specific/X25519/C64/ src/Specific/X25519/x25519_test.c $(DISPLAY_X25519_C64_VO:Display.vo=.c) src/Specific/X25519/C64/scalarmult.c diff --git a/_CoqProject b/_CoqProject index a041542cf..1475ca1d9 100644 --- a/_CoqProject +++ b/_CoqProject @@ -261,6 +261,10 @@ src/Specific/NISTP256/AMD64/fesubDisplay.v src/Specific/NISTP256/FancyMachine256/Barrett.v src/Specific/NISTP256/FancyMachine256/Core.v src/Specific/NISTP256/FancyMachine256/Montgomery.v +src/Specific/X25519/C32/femul.v +src/Specific/X25519/C32/femulDisplay.v +src/Specific/X25519/C32/fesquare.v +src/Specific/X25519/C32/fesquareDisplay.v src/Specific/X25519/C64/femul.v src/Specific/X25519/C64/femulDisplay.v src/Specific/X25519/C64/fesquare.v diff --git a/extract-function-header.sh b/extract-function-header.sh index 3535e3c37..36676db06 100755 --- a/extract-function-header.sh +++ b/extract-function-header.sh @@ -21,18 +21,21 @@ cat <<"EOF" EOF +if [ -z "${BITWIDTH}" ]; then + BITWIDTH=64 +fi + while IFS= read -r line; do case "$line" in *"λ '"*) echo -n "void force_inline $funcname(" - echo -n "uint64_t* out" + echo -n "uint${BITWIDTH}_t* out" echo "$line" | grep -owP -- '\w+\d+' | \ while IFS= read -r arg; do - echo -n ", uint64_t $arg" + echo -n ", uint${BITWIDTH}_t $arg" done echo ');' break ;; esac done - diff --git a/extract-function.sh b/extract-function.sh index 8653c4b32..024dda40f 100755 --- a/extract-function.sh +++ b/extract-function.sh @@ -44,14 +44,17 @@ if [ ! -z "${FIAT_CRYPTO_EXTRACT_FUNCTION_IS_ASM}" ]; then brace='' close_brace='' fi +if [ -z "${BITWIDTH}" ]; then + BITWIDTH=64 +fi while IFS= read -r line; do case "$line" in *"λ '"*) echo -n "void force_inline $funcname(" - echo -n "uint64_t* out" + echo -n "uint${BITWIDTH}_t* out" echo "$line" | grep -owP -- '\w+\d+' | \ while IFS= read -r arg; do - echo -n ", uint64_t $arg" + echo -n ", uint${BITWIDTH}_t $arg" done echo -n ')' echo "${function_open_brace}" @@ -69,7 +72,7 @@ while IFS= read -r line; do echo -n "${close_brace}" done echo "}" - echo "// caller: uint64_t out[$i];" ) + echo "// caller: uint${BITWIDTH}_t out[$i];" ) show=false break ;; diff --git a/src/Specific/X25519/C32/femul.v b/src/Specific/X25519/C32/femul.v new file mode 100644 index 000000000..19959f19e --- /dev/null +++ b/src/Specific/X25519/C32/femul.v @@ -0,0 +1,100 @@ +Require Import Coq.ZArith.ZArith. +Require Import Coq.Lists.List. +Local Open Scope Z_scope. + +Require Import Crypto.Arithmetic.Core. +Require Import Crypto.Util.FixedWordSizes. +Require Import Crypto.Specific.ArithmeticSynthesisTest32. +Require Import Crypto.Arithmetic.PrimeFieldTheorems. +Require Import Crypto.Util.Tuple Crypto.Util.Sigma Crypto.Util.Sigma.MapProjections Crypto.Util.Sigma.Lift Crypto.Util.Notations Crypto.Util.ZRange Crypto.Util.BoundedWord. +Require Import Crypto.Util.Tactics.Head. +Require Import Crypto.Util.Tactics.MoveLetIn. +Import ListNotations. + +Require Import Crypto.Specific.IntegrationTestTemporaryMiscCommon. + +Require Import Crypto.Compilers.Z.Bounds.Pipeline. + +Section BoundedField25p5. + Local Coercion Z.of_nat : nat >-> Z. + + Let limb_widths := Eval vm_compute in (List.map (fun i => Z.log2 (wt (S i) / wt i)) (seq 0 sz)). + Let length_lw := Eval compute in List.length limb_widths. + + Local Notation b_of exp := {| lower := 0 ; upper := 2^exp + 2^(exp-3) |}%Z (only parsing). (* max is [(0, 2^(exp+2) + 2^exp + 2^(exp-1) + 2^(exp-3) + 2^(exp-4) + 2^(exp-5) + 2^(exp-6) + 2^(exp-10) + 2^(exp-12) + 2^(exp-13) + 2^(exp-14) + 2^(exp-15) + 2^(exp-17) + 2^(exp-23) + 2^(exp-24))%Z] *) + (* The definition [bounds_exp] is a tuple-version of the + limb-widths, which are the [exp] argument in [b_of] above, i.e., + the approximate base-2 exponent of the bounds on the limb in that + position. *) + Let bounds_exp : Tuple.tuple Z length_lw + := Eval compute in + Tuple.from_list length_lw limb_widths eq_refl. + Let bounds : Tuple.tuple zrange length_lw + := Eval compute in + Tuple.map (fun e => b_of e) bounds_exp. + + Let lgbitwidth := Eval compute in (Z.to_nat (Z.log2_up (List.fold_right Z.max 0 limb_widths))). + Let bitwidth := Eval compute in (2^lgbitwidth)%nat. + Let feZ : Type := tuple Z sz. + Let feW : Type := tuple (wordT lgbitwidth) sz. + Let feBW : Type := BoundedWord sz bitwidth bounds. + Let phi : feBW -> F m := + fun x => B.Positional.Fdecode wt (BoundedWordToZ _ _ _ x). + + (* TODO : change this to field once field isomorphism happens *) + Definition mul : + { mul : feBW -> feBW -> feBW + | forall a b, phi (mul a b) = F.mul (phi a) (phi b) }. + Proof. + start_preglue. + do_rewrite_with_2sig_add_carry mul_sig carry_sig; cbv_runtime. + all:fin_preglue. + (* jgross start here! *) + (*Set Ltac Profiling.*) + Time refine_reflectively32. (* Finished transaction in 19.348 secs (19.284u,0.036s) (successful) *) + (*Show Ltac Profile.*) + (* total time: 19.632s + + tactic local total calls max +────────────────────────────────────────┴──────┴──────┴───────┴─────────┘ +─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s +─ReflectiveTactics.do_reflective_pipelin -0.0% 96.2% 1 18.884s +─ReflectiveTactics.solve_side_conditions 1.2% 96.1% 1 18.860s +─ReflectiveTactics.do_reify ------------ 27.7% 44.0% 1 8.640s +─ReflectiveTactics.abstract_vm_compute_r 12.3% 13.9% 2 2.024s +─ReflectiveTactics.abstract_vm_compute_r 8.9% 12.2% 2 1.576s +─Reify_rhs_gen ------------------------- 0.8% 11.7% 1 2.300s +─ReflectiveTactics.renamify_rhs -------- 10.4% 11.5% 1 2.260s +─ReflectiveTactics.abstract_rhs -------- 4.6% 5.8% 1 1.148s +─clear (var_list) ---------------------- 5.2% 5.2% 57 0.184s +─eexact -------------------------------- 4.1% 4.1% 68 0.028s +─prove_interp_compile_correct ---------- 0.0% 3.4% 1 0.664s +─ReflectiveTactics.abstract_cbv_interp_r 2.7% 3.3% 1 0.648s +─unify (constr) (constr) --------------- 3.2% 3.2% 6 0.248s +─rewrite ?EtaInterp.InterpExprEta ------ 3.1% 3.1% 1 0.612s +─ReflectiveTactics.abstract_cbv_rhs ---- 2.0% 2.7% 1 0.532s +─Glue.refine_to_reflective_glue -------- 0.0% 2.2% 1 0.436s +─rewrite H ----------------------------- 2.1% 2.1% 1 0.420s + + tactic local total calls max +────────────────────────────────────────┴──────┴──────┴───────┴─────────┘ +─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s + ├─ReflectiveTactics.do_reflective_pipel -0.0% 96.2% 1 18.884s + │└ReflectiveTactics.solve_side_conditio 1.2% 96.1% 1 18.860s + │ ├─ReflectiveTactics.do_reify -------- 27.7% 44.0% 1 8.640s + │ │ ├─Reify_rhs_gen ------------------- 0.8% 11.7% 1 2.300s + │ │ │ ├─prove_interp_compile_correct -- 0.0% 3.4% 1 0.664s + │ │ │ │└rewrite ?EtaInterp.InterpExprEt 3.1% 3.1% 1 0.612s + │ │ │ └─rewrite H --------------------- 2.1% 2.1% 1 0.420s + │ │ └─eexact -------------------------- 4.1% 4.1% 68 0.028s + │ ├─ReflectiveTactics.abstract_vm_compu 12.3% 13.9% 2 2.024s + │ ├─ReflectiveTactics.abstract_vm_compu 8.9% 12.2% 2 1.576s + │ ├─ReflectiveTactics.renamify_rhs ---- 10.4% 11.5% 1 2.260s + │ ├─ReflectiveTactics.abstract_rhs ---- 4.6% 5.8% 1 1.148s + │ ├─ReflectiveTactics.abstract_cbv_inte 2.7% 3.3% 1 0.648s + │ └─ReflectiveTactics.abstract_cbv_rhs 2.0% 2.7% 1 0.532s + └─Glue.refine_to_reflective_glue ------ 0.0% 2.2% 1 0.436s +*) + Time Defined. (* Finished transaction in 10.167 secs (10.123u,0.023s) (successful) *) + +End BoundedField25p5. diff --git a/src/Specific/X25519/C32/femulDisplay.log b/src/Specific/X25519/C32/femulDisplay.log new file mode 100644 index 000000000..12fa54d8a --- /dev/null +++ b/src/Specific/X25519/C32/femulDisplay.log @@ -0,0 +1,88 @@ +λ x x0 : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32, +Interp-η +(λ var : Syntax.base_type → Type, + λ '(x20, x21, x19, x17, x15, x13, x11, x9, x7, x5, (x38, x39, x37, x35, x33, x31, x29, x27, x25, x23))%core, + uint64_t x40 = (uint64_t) x23 * x5; + uint64_t x41 = (uint64_t) x23 * x7 + (uint64_t) x25 * x5; + uint64_t x42 = (uint64_t) (0x2 * x25) * x7 + (uint64_t) x23 * x9 + (uint64_t) x27 * x5; + uint64_t x43 = (uint64_t) x25 * x9 + (uint64_t) x27 * x7 + (uint64_t) x23 * x11 + (uint64_t) x29 * x5; + uint64_t x44 = (uint64_t) x27 * x9 + 0x2 * ((uint64_t) x25 * x11 + (uint64_t) x29 * x7) + (uint64_t) x23 * x13 + (uint64_t) x31 * x5; + uint64_t x45 = (uint64_t) x27 * x11 + (uint64_t) x29 * x9 + (uint64_t) x25 * x13 + (uint64_t) x31 * x7 + (uint64_t) x23 * x15 + (uint64_t) x33 * x5; + uint64_t x46 = 0x2 * ((uint64_t) x29 * x11 + (uint64_t) x25 * x15 + (uint64_t) x33 * x7) + (uint64_t) x27 * x13 + (uint64_t) x31 * x9 + (uint64_t) x23 * x17 + (uint64_t) x35 * x5; + uint64_t x47 = (uint64_t) x29 * x13 + (uint64_t) x31 * x11 + (uint64_t) x27 * x15 + (uint64_t) x33 * x9 + (uint64_t) x25 * x17 + (uint64_t) x35 * x7 + (uint64_t) x23 * x19 + (uint64_t) x37 * x5; + uint64_t x48 = (uint64_t) x31 * x13 + 0x2 * ((uint64_t) x29 * x15 + (uint64_t) x33 * x11 + (uint64_t) x25 * x19 + (uint64_t) x37 * x7) + (uint64_t) x27 * x17 + (uint64_t) x35 * x9 + (uint64_t) x23 * x21 + (uint64_t) x39 * x5; + uint64_t x49 = (uint64_t) x31 * x15 + (uint64_t) x33 * x13 + (uint64_t) x29 * x17 + (uint64_t) x35 * x11 + (uint64_t) x27 * x19 + (uint64_t) x37 * x9 + (uint64_t) x25 * x21 + (uint64_t) x39 * x7 + (uint64_t) x23 * x20 + (uint64_t) x38 * x5; + uint64_t x50 = 0x2 * ((uint64_t) x33 * x15 + (uint64_t) x29 * x19 + (uint64_t) x37 * x11 + (uint64_t) x25 * x20 + (uint64_t) x38 * x7) + (uint64_t) x31 * x17 + (uint64_t) x35 * x13 + (uint64_t) x27 * x21 + (uint64_t) x39 * x9; + uint64_t x51 = (uint64_t) x33 * x17 + (uint64_t) x35 * x15 + (uint64_t) x31 * x19 + (uint64_t) x37 * x13 + (uint64_t) x29 * x21 + (uint64_t) x39 * x11 + (uint64_t) x27 * x20 + (uint64_t) x38 * x9; + uint64_t x52 = (uint64_t) x35 * x17 + 0x2 * ((uint64_t) x33 * x19 + (uint64_t) x37 * x15 + (uint64_t) x29 * x20 + (uint64_t) x38 * x11) + (uint64_t) x31 * x21 + (uint64_t) x39 * x13; + uint64_t x53 = (uint64_t) x35 * x19 + (uint64_t) x37 * x17 + (uint64_t) x33 * x21 + (uint64_t) x39 * x15 + (uint64_t) x31 * x20 + (uint64_t) x38 * x13; + uint64_t x54 = 0x2 * ((uint64_t) x37 * x19 + (uint64_t) x33 * x20 + (uint64_t) x38 * x15) + (uint64_t) x35 * x21 + (uint64_t) x39 * x17; + uint64_t x55 = (uint64_t) x37 * x21 + (uint64_t) x39 * x19 + (uint64_t) x35 * x20 + (uint64_t) x38 * x17; + uint64_t x56 = (uint64_t) x39 * x21 + 0x2 * ((uint64_t) x37 * x20 + (uint64_t) x38 * x19); + uint64_t x57 = (uint64_t) x39 * x20 + (uint64_t) x38 * x21; + uint64_t x58 = (uint64_t) (0x2 * x38) * x20; + uint64_t x59 = x48 + x58 << 0x4; + uint64_t x60 = x59 + x58 << 0x1; + uint64_t x61 = x60 + x58; + uint64_t x62 = x47 + x57 << 0x4; + uint64_t x63 = x62 + x57 << 0x1; + uint64_t x64 = x63 + x57; + uint64_t x65 = x46 + x56 << 0x4; + uint64_t x66 = x65 + x56 << 0x1; + uint64_t x67 = x66 + x56; + uint64_t x68 = x45 + x55 << 0x4; + uint64_t x69 = x68 + x55 << 0x1; + uint64_t x70 = x69 + x55; + uint64_t x71 = x44 + x54 << 0x4; + uint64_t x72 = x71 + x54 << 0x1; + uint64_t x73 = x72 + x54; + uint64_t x74 = x43 + x53 << 0x4; + uint64_t x75 = x74 + x53 << 0x1; + uint64_t x76 = x75 + x53; + uint64_t x77 = x42 + x52 << 0x4; + uint64_t x78 = x77 + x52 << 0x1; + uint64_t x79 = x78 + x52; + uint64_t x80 = x41 + x51 << 0x4; + uint64_t x81 = x80 + x51 << 0x1; + uint64_t x82 = x81 + x51; + uint64_t x83 = x40 + x50 << 0x4; + uint64_t x84 = x83 + x50 << 0x1; + uint64_t x85 = x84 + x50; + uint64_t x86 = x85 >> 0x1a; + uint32_t x87 = (uint32_t) x85 & 0x3ffffff; + uint64_t x88 = x86 + x82; + uint64_t x89 = x88 >> 0x19; + uint32_t x90 = (uint32_t) x88 & 0x1ffffff; + uint64_t x91 = x89 + x79; + uint64_t x92 = x91 >> 0x1a; + uint32_t x93 = (uint32_t) x91 & 0x3ffffff; + uint64_t x94 = x92 + x76; + uint64_t x95 = x94 >> 0x19; + uint32_t x96 = (uint32_t) x94 & 0x1ffffff; + uint64_t x97 = x95 + x73; + uint64_t x98 = x97 >> 0x1a; + uint32_t x99 = (uint32_t) x97 & 0x3ffffff; + uint64_t x100 = x98 + x70; + uint64_t x101 = x100 >> 0x19; + uint32_t x102 = (uint32_t) x100 & 0x1ffffff; + uint64_t x103 = x101 + x67; + uint32_t x104 = (uint32_t) (x103 >> 0x1a); + uint32_t x105 = (uint32_t) x103 & 0x3ffffff; + uint64_t x106 = x104 + x64; + uint32_t x107 = (uint32_t) (x106 >> 0x19); + uint32_t x108 = (uint32_t) x106 & 0x1ffffff; + uint64_t x109 = x107 + x61; + uint32_t x110 = (uint32_t) (x109 >> 0x1a); + uint32_t x111 = (uint32_t) x109 & 0x3ffffff; + uint64_t x112 = x110 + x49; + uint32_t x113 = (uint32_t) (x112 >> 0x19); + uint32_t x114 = (uint32_t) x112 & 0x1ffffff; + uint64_t x115 = x87 + (uint64_t) 0x13 * x113; + uint32_t x116 = (uint32_t) (x115 >> 0x1a); + uint32_t x117 = (uint32_t) x115 & 0x3ffffff; + uint32_t x118 = x116 + x90; + uint32_t x119 = x118 >> 0x19; + uint32_t x120 = x118 & 0x1ffffff; + return (Return x114, Return x111, Return x108, Return x105, Return x102, Return x99, Return x96, x119 + x93, Return x120, Return x117)) +(x, x0)%core + : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t) diff --git a/src/Specific/X25519/C32/femulDisplay.v b/src/Specific/X25519/C32/femulDisplay.v new file mode 100644 index 000000000..e603a33c0 --- /dev/null +++ b/src/Specific/X25519/C32/femulDisplay.v @@ -0,0 +1,4 @@ +Require Import Crypto.Specific.X25519.C32.femul. +Require Import Crypto.Specific.IntegrationTestDisplayCommon. + +Check display mul. diff --git a/src/Specific/X25519/C32/fesquare.v b/src/Specific/X25519/C32/fesquare.v new file mode 100644 index 000000000..00bf7c08b --- /dev/null +++ b/src/Specific/X25519/C32/fesquare.v @@ -0,0 +1,100 @@ +Require Import Coq.ZArith.ZArith. +Require Import Coq.Lists.List. +Local Open Scope Z_scope. + +Require Import Crypto.Arithmetic.Core. +Require Import Crypto.Util.FixedWordSizes. +Require Import Crypto.Specific.ArithmeticSynthesisTest32. +Require Import Crypto.Arithmetic.PrimeFieldTheorems. +Require Import Crypto.Util.Tuple Crypto.Util.Sigma Crypto.Util.Sigma.MapProjections Crypto.Util.Sigma.Lift Crypto.Util.Notations Crypto.Util.ZRange Crypto.Util.BoundedWord. +Require Import Crypto.Util.Tactics.Head. +Require Import Crypto.Util.Tactics.MoveLetIn. +Import ListNotations. + +Require Import Crypto.Specific.IntegrationTestTemporaryMiscCommon. + +Require Import Crypto.Compilers.Z.Bounds.Pipeline. + +Section BoundedField25p5. + Local Coercion Z.of_nat : nat >-> Z. + + Let limb_widths := Eval vm_compute in (List.map (fun i => Z.log2 (wt (S i) / wt i)) (seq 0 sz)). + Let length_lw := Eval compute in List.length limb_widths. + + Local Notation b_of exp := {| lower := 0 ; upper := 2^exp + 2^(exp-3) |}%Z (only parsing). (* max is [(0, 2^(exp+2) + 2^exp + 2^(exp-1) + 2^(exp-3) + 2^(exp-4) + 2^(exp-5) + 2^(exp-6) + 2^(exp-10) + 2^(exp-12) + 2^(exp-13) + 2^(exp-14) + 2^(exp-15) + 2^(exp-17) + 2^(exp-23) + 2^(exp-24))%Z] *) + (* The definition [bounds_exp] is a tuple-version of the + limb-widths, which are the [exp] argument in [b_of] above, i.e., + the approximate base-2 exponent of the bounds on the limb in that + position. *) + Let bounds_exp : Tuple.tuple Z length_lw + := Eval compute in + Tuple.from_list length_lw limb_widths eq_refl. + Let bounds : Tuple.tuple zrange length_lw + := Eval compute in + Tuple.map (fun e => b_of e) bounds_exp. + + Let lgbitwidth := Eval compute in (Z.to_nat (Z.log2_up (List.fold_right Z.max 0 limb_widths))). + Let bitwidth := Eval compute in (2^lgbitwidth)%nat. + Let feZ : Type := tuple Z sz. + Let feW : Type := tuple (wordT lgbitwidth) sz. + Let feBW : Type := BoundedWord sz bitwidth bounds. + Let phi : feBW -> F m := + fun x => B.Positional.Fdecode wt (BoundedWordToZ _ _ _ x). + + (* TODO : change this to field once field isomorphism happens *) + Definition square : + { square : feBW -> feBW + | forall a, phi (square a) = F.mul (phi a) (phi a) }. + Proof. + start_preglue. + do_rewrite_with_1sig_add_carry square_sig carry_sig; cbv_runtime. + all:fin_preglue. + (* jgross start here! *) + (*Set Ltac Profiling.*) + Time refine_reflectively32. (* Finished transaction in 19.348 secs (19.284u,0.036s) (successful) *) + (*Show Ltac Profile.*) + (* total time: 19.632s + + tactic local total calls max +────────────────────────────────────────┴──────┴──────┴───────┴─────────┘ +─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s +─ReflectiveTactics.do_reflective_pipelin -0.0% 96.2% 1 18.884s +─ReflectiveTactics.solve_side_conditions 1.2% 96.1% 1 18.860s +─ReflectiveTactics.do_reify ------------ 27.7% 44.0% 1 8.640s +─ReflectiveTactics.abstract_vm_compute_r 12.3% 13.9% 2 2.024s +─ReflectiveTactics.abstract_vm_compute_r 8.9% 12.2% 2 1.576s +─Reify_rhs_gen ------------------------- 0.8% 11.7% 1 2.300s +─ReflectiveTactics.renamify_rhs -------- 10.4% 11.5% 1 2.260s +─ReflectiveTactics.abstract_rhs -------- 4.6% 5.8% 1 1.148s +─clear (var_list) ---------------------- 5.2% 5.2% 57 0.184s +─eexact -------------------------------- 4.1% 4.1% 68 0.028s +─prove_interp_compile_correct ---------- 0.0% 3.4% 1 0.664s +─ReflectiveTactics.abstract_cbv_interp_r 2.7% 3.3% 1 0.648s +─unify (constr) (constr) --------------- 3.2% 3.2% 6 0.248s +─rewrite ?EtaInterp.InterpExprEta ------ 3.1% 3.1% 1 0.612s +─ReflectiveTactics.abstract_cbv_rhs ---- 2.0% 2.7% 1 0.532s +─Glue.refine_to_reflective_glue -------- 0.0% 2.2% 1 0.436s +─rewrite H ----------------------------- 2.1% 2.1% 1 0.420s + + tactic local total calls max +────────────────────────────────────────┴──────┴──────┴───────┴─────────┘ +─refine_reflectively ------------------- 0.0% 98.4% 1 19.320s + ├─ReflectiveTactics.do_reflective_pipel -0.0% 96.2% 1 18.884s + │└ReflectiveTactics.solve_side_conditio 1.2% 96.1% 1 18.860s + │ ├─ReflectiveTactics.do_reify -------- 27.7% 44.0% 1 8.640s + │ │ ├─Reify_rhs_gen ------------------- 0.8% 11.7% 1 2.300s + │ │ │ ├─prove_interp_compile_correct -- 0.0% 3.4% 1 0.664s + │ │ │ │└rewrite ?EtaInterp.InterpExprEt 3.1% 3.1% 1 0.612s + │ │ │ └─rewrite H --------------------- 2.1% 2.1% 1 0.420s + │ │ └─eexact -------------------------- 4.1% 4.1% 68 0.028s + │ ├─ReflectiveTactics.abstract_vm_compu 12.3% 13.9% 2 2.024s + │ ├─ReflectiveTactics.abstract_vm_compu 8.9% 12.2% 2 1.576s + │ ├─ReflectiveTactics.renamify_rhs ---- 10.4% 11.5% 1 2.260s + │ ├─ReflectiveTactics.abstract_rhs ---- 4.6% 5.8% 1 1.148s + │ ├─ReflectiveTactics.abstract_cbv_inte 2.7% 3.3% 1 0.648s + │ └─ReflectiveTactics.abstract_cbv_rhs 2.0% 2.7% 1 0.532s + └─Glue.refine_to_reflective_glue ------ 0.0% 2.2% 1 0.436s +*) + Time Defined. (* Finished transaction in 10.167 secs (10.123u,0.023s) (successful) *) + +End BoundedField25p5. diff --git a/src/Specific/X25519/C32/fesquareDisplay.log b/src/Specific/X25519/C32/fesquareDisplay.log new file mode 100644 index 000000000..d9b0c533f --- /dev/null +++ b/src/Specific/X25519/C32/fesquareDisplay.log @@ -0,0 +1,88 @@ +λ x : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32, +Interp-η +(λ var : Syntax.base_type → Type, + λ '(x17, x18, x16, x14, x12, x10, x8, x6, x4, x2)%core, + uint64_t x19 = (uint64_t) x2 * x2; + uint64_t x20 = (uint64_t) (0x2 * x2) * x4; + uint64_t x21 = 0x2 * ((uint64_t) x4 * x4 + (uint64_t) x2 * x6); + uint64_t x22 = 0x2 * ((uint64_t) x4 * x6 + (uint64_t) x2 * x8); + uint64_t x23 = (uint64_t) x6 * x6 + (uint64_t) (0x4 * x4) * x8 + (uint64_t) (0x2 * x2) * x10; + uint64_t x24 = 0x2 * ((uint64_t) x6 * x8 + (uint64_t) x4 * x10 + (uint64_t) x2 * x12); + uint64_t x25 = 0x2 * ((uint64_t) x8 * x8 + (uint64_t) x6 * x10 + (uint64_t) x2 * x14 + (uint64_t) (0x2 * x4) * x12); + uint64_t x26 = 0x2 * ((uint64_t) x8 * x10 + (uint64_t) x6 * x12 + (uint64_t) x4 * x14 + (uint64_t) x2 * x16); + uint64_t x27 = (uint64_t) x10 * x10 + 0x2 * ((uint64_t) x6 * x14 + (uint64_t) x2 * x18 + 0x2 * ((uint64_t) x4 * x16 + (uint64_t) x8 * x12)); + uint64_t x28 = 0x2 * ((uint64_t) x10 * x12 + (uint64_t) x8 * x14 + (uint64_t) x6 * x16 + (uint64_t) x4 * x18 + (uint64_t) x2 * x17); + uint64_t x29 = 0x2 * ((uint64_t) x12 * x12 + (uint64_t) x10 * x14 + (uint64_t) x6 * x18 + 0x2 * ((uint64_t) x8 * x16 + (uint64_t) x4 * x17)); + uint64_t x30 = 0x2 * ((uint64_t) x12 * x14 + (uint64_t) x10 * x16 + (uint64_t) x8 * x18 + (uint64_t) x6 * x17); + uint64_t x31 = (uint64_t) x14 * x14 + 0x2 * ((uint64_t) x10 * x18 + 0x2 * ((uint64_t) x12 * x16 + (uint64_t) x8 * x17)); + uint64_t x32 = 0x2 * ((uint64_t) x14 * x16 + (uint64_t) x12 * x18 + (uint64_t) x10 * x17); + uint64_t x33 = 0x2 * ((uint64_t) x16 * x16 + (uint64_t) x14 * x18 + (uint64_t) (0x2 * x12) * x17); + uint64_t x34 = 0x2 * ((uint64_t) x16 * x18 + (uint64_t) x14 * x17); + uint64_t x35 = (uint64_t) x18 * x18 + (uint64_t) (0x4 * x16) * x17; + uint64_t x36 = (uint64_t) (0x2 * x18) * x17; + uint64_t x37 = (uint64_t) (0x2 * x17) * x17; + uint64_t x38 = x27 + x37 << 0x4; + uint64_t x39 = x38 + x37 << 0x1; + uint64_t x40 = x39 + x37; + uint64_t x41 = x26 + x36 << 0x4; + uint64_t x42 = x41 + x36 << 0x1; + uint64_t x43 = x42 + x36; + uint64_t x44 = x25 + x35 << 0x4; + uint64_t x45 = x44 + x35 << 0x1; + uint64_t x46 = x45 + x35; + uint64_t x47 = x24 + x34 << 0x4; + uint64_t x48 = x47 + x34 << 0x1; + uint64_t x49 = x48 + x34; + uint64_t x50 = x23 + x33 << 0x4; + uint64_t x51 = x50 + x33 << 0x1; + uint64_t x52 = x51 + x33; + uint64_t x53 = x22 + x32 << 0x4; + uint64_t x54 = x53 + x32 << 0x1; + uint64_t x55 = x54 + x32; + uint64_t x56 = x21 + x31 << 0x4; + uint64_t x57 = x56 + x31 << 0x1; + uint64_t x58 = x57 + x31; + uint64_t x59 = x20 + x30 << 0x4; + uint64_t x60 = x59 + x30 << 0x1; + uint64_t x61 = x60 + x30; + uint64_t x62 = x19 + x29 << 0x4; + uint64_t x63 = x62 + x29 << 0x1; + uint64_t x64 = x63 + x29; + uint64_t x65 = x64 >> 0x1a; + uint32_t x66 = (uint32_t) x64 & 0x3ffffff; + uint64_t x67 = x65 + x61; + uint64_t x68 = x67 >> 0x19; + uint32_t x69 = (uint32_t) x67 & 0x1ffffff; + uint64_t x70 = x68 + x58; + uint64_t x71 = x70 >> 0x1a; + uint32_t x72 = (uint32_t) x70 & 0x3ffffff; + uint64_t x73 = x71 + x55; + uint64_t x74 = x73 >> 0x19; + uint32_t x75 = (uint32_t) x73 & 0x1ffffff; + uint64_t x76 = x74 + x52; + uint64_t x77 = x76 >> 0x1a; + uint32_t x78 = (uint32_t) x76 & 0x3ffffff; + uint64_t x79 = x77 + x49; + uint64_t x80 = x79 >> 0x19; + uint32_t x81 = (uint32_t) x79 & 0x1ffffff; + uint64_t x82 = x80 + x46; + uint32_t x83 = (uint32_t) (x82 >> 0x1a); + uint32_t x84 = (uint32_t) x82 & 0x3ffffff; + uint64_t x85 = x83 + x43; + uint32_t x86 = (uint32_t) (x85 >> 0x19); + uint32_t x87 = (uint32_t) x85 & 0x1ffffff; + uint64_t x88 = x86 + x40; + uint32_t x89 = (uint32_t) (x88 >> 0x1a); + uint32_t x90 = (uint32_t) x88 & 0x3ffffff; + uint64_t x91 = x89 + x28; + uint32_t x92 = (uint32_t) (x91 >> 0x19); + uint32_t x93 = (uint32_t) x91 & 0x1ffffff; + uint64_t x94 = x66 + (uint64_t) 0x13 * x92; + uint32_t x95 = (uint32_t) (x94 >> 0x1a); + uint32_t x96 = (uint32_t) x94 & 0x3ffffff; + uint32_t x97 = x95 + x69; + uint32_t x98 = x97 >> 0x19; + uint32_t x99 = x97 & 0x1ffffff; + return (Return x93, Return x90, Return x87, Return x84, Return x81, Return x78, Return x75, x98 + x72, Return x99, Return x96)) +x + : word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 * word32 → ReturnType (uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t * uint32_t) diff --git a/src/Specific/X25519/C32/fesquareDisplay.v b/src/Specific/X25519/C32/fesquareDisplay.v new file mode 100644 index 000000000..1cc917c7e --- /dev/null +++ b/src/Specific/X25519/C32/fesquareDisplay.v @@ -0,0 +1,4 @@ +Require Import Crypto.Specific.X25519.C32.fesquare. +Require Import Crypto.Specific.IntegrationTestDisplayCommon. + +Check display square. -- cgit v1.2.3