aboutsummaryrefslogtreecommitdiff
path: root/src/Arithmetic/MontgomeryReduction
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-10-19 14:44:48 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-10-19 15:40:23 -0400
commit7e939cd63236d0a6a492ddff5015daf3f706a3bc (patch)
treefa19e772dc624eb7899017b55e527de184e7bf8f /src/Arithmetic/MontgomeryReduction
parent79b586e4589f56d081301de92b305569c1077ed2 (diff)
Switch arithmetic to cps for Z * Z under the hood
This is in preparation for writing a ~compiler for the arithmetic things to expression trees. I'm not sure what's up with femul in the table below; I ran it again and got: After: src/Specific/NISTP256/AMD64/femul (real: 115.70, user: 115.25, sys: 0.44, mem: 3571448 ko) Before: src/Specific/NISTP256/AMD64/femul (real: 118.49, user: 117.99, sys: 0.43, mem: 3581612 ko) After | File Name | Before || Change --------------------------------------------------------------------------------------------- 17m02.82s | Total | 16m36.20s || +0m26.61s --------------------------------------------------------------------------------------------- 2m27.04s | Specific/NISTP256/AMD64/femul | 2m04.60s || +0m22.43s 1m38.55s | Specific/X2448/Karatsuba/C64/femul | 1m41.44s || -0m02.89s 0m12.46s | Arithmetic/Saturated/AddSub | 0m09.77s || +0m02.69s 3m22.38s | Specific/X25519/C64/ladderstep | 3m23.49s || -0m01.11s 0m54.40s | Specific/X25519/C32/fesquare | 0m52.68s || +0m01.71s 0m28.70s | Arithmetic/Karatsuba | 0m27.59s || +0m01.10s 0m10.00s | Arithmetic/Saturated/MontgomeryAPI | 0m08.95s || +0m01.05s 0m08.15s | Specific/X2448/Karatsuba/C64/Synthesis | 0m09.47s || -0m01.32s 0m05.62s | Arithmetic/Saturated/MulSplit | 0m04.28s || +0m01.33s 1m29.44s | Specific/X25519/C32/femul | 1m28.55s || +0m00.89s 0m39.38s | Specific/X25519/C32/freeze | 0m38.62s || +0m00.76s 0m31.54s | Specific/NISTP256/AMD128/femul | 0m31.60s || -0m00.06s 0m24.80s | Specific/X25519/C64/femul | 0m24.10s || +0m00.69s 0m23.82s | Specific/NISTP256/AMD64/fesub | 0m23.52s || +0m00.30s 0m21.81s | Specific/NISTP256/AMD64/feadd | 0m21.90s || -0m00.08s 0m20.30s | Specific/X25519/C64/freeze | 0m20.26s || +0m00.03s 0m20.12s | Specific/X25519/C32/Synthesis | 0m20.77s || -0m00.64s 0m19.12s | Specific/X25519/C64/fesquare | 0m19.02s || +0m00.10s 0m17.28s | Specific/NISTP256/AMD64/feopp | 0m17.68s || -0m00.39s 0m15.99s | Specific/NISTP256/AMD128/fesub | 0m16.03s || -0m00.04s 0m15.88s | Specific/NISTP256/AMD128/feadd | 0m16.56s || -0m00.67s 0m15.03s | Specific/NISTP256/AMD64/fenz | 0m15.00s || +0m00.02s 0m14.18s | Specific/NISTP256/AMD128/fenz | 0m14.12s || +0m00.06s 0m13.46s | Specific/NISTP256/AMD128/feopp | 0m12.88s || +0m00.58s 0m12.15s | Arithmetic/Core | 0m12.03s || +0m00.12s 0m07.82s | Arithmetic/Saturated/Core | 0m07.05s || +0m00.77s 0m07.13s | Specific/NISTP256/AMD64/Synthesis | 0m08.05s || -0m00.92s 0m05.48s | Specific/X25519/C64/Synthesis | 0m05.68s || -0m00.19s 0m04.02s | Specific/Framework/ArithmeticSynthesis/Montgomery | 0m03.89s || +0m00.12s 0m03.52s | Arithmetic/MontgomeryReduction/WordByWord/Proofs | 0m03.34s || +0m00.18s 0m03.32s | Specific/NISTP256/AMD128/Synthesis | 0m03.46s || -0m00.14s 0m02.30s | Specific/Framework/ArithmeticSynthesis/Defaults | 0m02.31s || -0m00.01s 0m02.08s | Arithmetic/Saturated/Freeze | 0m01.94s || +0m00.14s 0m01.66s | Specific/Framework/OutputType | 0m01.66s || +0m00.00s 0m01.54s | Arithmetic/CoreUnfolder | 0m01.43s || +0m00.11s 0m01.35s | Specific/Framework/ArithmeticSynthesis/Karatsuba | 0m01.28s || +0m00.07s 0m01.13s | Arithmetic/Saturated/CoreUnfolder | 0m01.16s || -0m00.03s 0m01.06s | Arithmetic/Saturated/WrappersUnfolder | 0m01.04s || +0m00.02s 0m01.04s | Arithmetic/Saturated/UniformWeight | 0m00.95s || +0m00.09s 0m01.03s | Specific/Framework/ArithmeticSynthesis/Base | 0m01.14s || -0m00.10s 0m01.02s | Specific/Framework/SynthesisFramework | 0m01.04s || -0m00.02s 0m00.97s | Specific/Framework/ArithmeticSynthesis/HelperTactics | 0m01.01s || -0m00.04s 0m00.92s | Specific/Framework/ReificationTypes | 0m00.90s || +0m00.02s 0m00.92s | Specific/Framework/ArithmeticSynthesis/Freeze | 0m00.93s || -0m00.01s 0m00.90s | Arithmetic/Saturated/MulSplitUnfolder | 0m00.83s || +0m00.07s 0m00.83s | Specific/Framework/ReificationTypesPackage | 0m00.79s || +0m00.03s 0m00.83s | Arithmetic/Saturated/FreezeUnfolder | 0m00.86s || -0m00.03s 0m00.82s | Specific/Framework/ArithmeticSynthesis/BasePackage | 0m00.77s || +0m00.04s 0m00.81s | Specific/Framework/ArithmeticSynthesis/SquareFromMul | 0m00.72s || +0m00.09s 0m00.81s | Specific/Framework/ArithmeticSynthesis/LadderstepPackage | 0m00.82s || -0m00.00s 0m00.80s | Specific/Framework/MontgomeryReificationTypesPackage | 0m00.82s || -0m00.01s 0m00.78s | Specific/Framework/ArithmeticSynthesis/MontgomeryPackage | 0m00.79s || -0m00.01s 0m00.78s | Arithmetic/Saturated/Wrappers | 0m00.78s || +0m00.00s 0m00.76s | Specific/Framework/ArithmeticSynthesis/FreezePackage | 0m00.80s || -0m00.04s 0m00.76s | Specific/Framework/ArithmeticSynthesis/DefaultsPackage | 0m00.75s || +0m00.01s 0m00.75s | Specific/Framework/MontgomeryReificationTypes | 0m00.78s || -0m00.03s 0m00.73s | Specific/Framework/ArithmeticSynthesis/Ladderstep | 0m00.77s || -0m00.04s 0m00.73s | Arithmetic/MontgomeryReduction/WordByWord/Definition | 0m00.80s || -0m00.07s 0m00.72s | Arithmetic/Saturated/UniformWeightInstances | 0m00.78s || -0m00.06s 0m00.68s | Specific/Framework/ArithmeticSynthesis/KaratsubaPackage | 0m00.76s || -0m00.07s 0m00.43s | Util/ZUtil/CPS | 0m00.42s || +0m00.01s
Diffstat (limited to 'src/Arithmetic/MontgomeryReduction')
-rw-r--r--src/Arithmetic/MontgomeryReduction/WordByWord/Definition.v6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/Arithmetic/MontgomeryReduction/WordByWord/Definition.v b/src/Arithmetic/MontgomeryReduction/WordByWord/Definition.v
index 9affa82fa..fd4869f23 100644
--- a/src/Arithmetic/MontgomeryReduction/WordByWord/Definition.v
+++ b/src/Arithmetic/MontgomeryReduction/WordByWord/Definition.v
@@ -10,6 +10,7 @@ Require Import Crypto.Arithmetic.MontgomeryReduction.WordByWord.Abstract.Depende
Require Import Crypto.Util.Notations.
Require Import Crypto.Util.LetIn.
Require Import Crypto.Util.ZUtil.Definitions.
+Require Import Crypto.Util.ZUtil.CPS.
Local Open Scope Z_scope.
@@ -47,10 +48,11 @@ Section WordByWordMontgomery.
:= divmod_cps A (fun '(A, a) =>
@scmul_cps r _ a B _ (fun aB => @add_cps r _ S' aB _ (fun S1 =>
divmod_cps S1 (fun '(_, s) =>
- dlet q := fst (Z.mul_split r s k) in
+ Z.mul_split_cps' r s k (fun mul_split_r_s_k =>
+ dlet q := fst mul_split_r_s_k in
@scmul_cps r _ q N _ (fun qN => @add_S1_cps r _ S1 qN _ (fun S2 =>
divmod_cps S2 (fun '(S3, _) =>
- @drop_high_cps (S R_numlimbs) S3 _ (fun S4 => rest (A, S4))))))))).
+ @drop_high_cps (S R_numlimbs) S3 _ (fun S4 => rest (A, S4)))))))))).
Section loop.
Context {A_numlimbs} (A : T A_numlimbs) (B : T R_numlimbs) (k : Z) {cpsT : Type}.