aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2019-01-25 16:19:49 -0500
committerGravatar Jason Gross <jasongross9@gmail.com>2019-01-26 18:12:06 -0500
commit90c59282be5ba1f2cd7c2561d17f0559c140c597 (patch)
tree2aa4a1b468c3a848ffb44a46e2b6c5b3cc1630fc /src
parent299aec51224743bea006460af8edc37525a48baa (diff)
Reduce more in mulmod and squaremod
After some debugging with Andres, the issue seems to be that when we have multiple TAPs, the first reduction will leave some of the partial products past the end of the table, because it "folds over" the partial products table at the location of each TAP. In the worst case, if we have a TAP at 1 and a TAP at the second-highest limb, we will have to reduce as many times as there are limbs. To prevent quadratic blowup, we short-circuit the repeat_reduce calculation whenever the list of high terms is nil. We deliberately do not support primes with a TAP in the highest limb. After | File Name | Before || Change | % Change ----------------------------------------------------------------------------------------------------------- 17m49.68s | Total | 17m34.83s || +0m14.84s | +1.40% ----------------------------------------------------------------------------------------------------------- 2m22.88s | PushButtonSynthesis/WordByWordMontgomeryReificationCache.vo | 2m21.60s || +0m01.28s | +0.90% 2m19.62s | Arithmetic.vo | 2m17.66s || +0m01.96s | +1.42% 0m34.02s | PushButtonSynthesis/UnsaturatedSolinasReificationCache.vo | 0m32.24s || +0m01.78s | +5.52% 0m32.06s | ExtractionHaskell/unsaturated_solinas | 0m30.99s || +0m01.07s | +3.45% 0m19.95s | SlowPrimeSynthesisExamples.vo | 0m18.40s || +0m01.55s | +8.42% 3m12.02s | p384_32.c | 3m12.03s || -0m00.00s | -0.00% 1m02.14s | Fancy/Montgomery256.vo | 1m02.09s || +0m00.04s | +0.08% 0m51.24s | Fancy/Barrett256.vo | 0m51.28s || -0m00.03s | -0.07% 0m45.72s | ExtractionHaskell/word_by_word_montgomery | 0m45.05s || +0m00.67s | +1.48% 0m38.67s | p521_32.c | 0m38.24s || +0m00.42s | +1.12% 0m31.94s | p521_64.c | 0m31.38s || +0m00.56s | +1.78% 0m24.85s | ExtractionHaskell/saturated_solinas | 0m24.45s || +0m00.40s | +1.63% 0m23.53s | PushButtonSynthesis/MontgomeryReductionReificationCache.vo | 0m22.79s || +0m00.74s | +3.24% 0m21.96s | PushButtonSynthesis/WordByWordMontgomery.vo | 0m21.88s || +0m00.08s | +0.36% 0m19.60s | PushButtonSynthesis/UnsaturatedSolinas.vo | 0m19.56s || +0m00.04s | +0.20% 0m17.76s | ExtractionOCaml/word_by_word_montgomery | 0m17.26s || +0m00.50s | +2.89% 0m14.90s | p448_solinas_64.c | 0m14.91s || -0m00.00s | -0.06% 0m13.55s | secp256k1_32.c | 0m13.39s || +0m00.16s | +1.19% 0m13.31s | p256_32.c | 0m13.06s || +0m00.25s | +1.91% 0m13.30s | PushButtonSynthesis/BarrettReductionReificationCache.vo | 0m12.44s || +0m00.86s | +6.91% 0m11.47s | p484_64.c | 0m11.31s || +0m00.16s | +1.41% 0m11.43s | ExtractionOCaml/unsaturated_solinas | 0m11.15s || +0m00.27s | +2.51% 0m10.55s | ExtractionOCaml/word_by_word_montgomery.ml | 0m10.66s || -0m00.10s | -1.03% 0m08.42s | ExtractionOCaml/saturated_solinas | 0m08.24s || +0m00.17s | +2.18% 0m07.07s | ExtractionOCaml/unsaturated_solinas.ml | 0m07.04s || +0m00.03s | +0.42% 0m06.66s | COperationSpecifications.vo | 0m06.31s || +0m00.35s | +5.54% 0m06.50s | ExtractionHaskell/word_by_word_montgomery.hs | 0m06.39s || +0m00.11s | +1.72% 0m06.19s | p224_32.c | 0m05.99s || +0m00.20s | +3.33% 0m05.35s | p384_64.c | 0m05.24s || +0m00.10s | +2.09% 0m05.26s | ExtractionOCaml/saturated_solinas.ml | 0m05.24s || +0m00.01s | +0.38% 0m05.24s | ExtractionHaskell/unsaturated_solinas.hs | 0m05.17s || +0m00.07s | +1.35% 0m04.23s | ExtractionHaskell/saturated_solinas.hs | 0m04.12s || +0m00.11s | +2.66% 0m04.14s | PushButtonSynthesis/SaturatedSolinasReificationCache.vo | 0m03.89s || +0m00.24s | +6.42% 0m03.68s | PushButtonSynthesis/Primitives.vo | 0m03.62s || +0m00.06s | +1.65% 0m03.58s | PushButtonSynthesis/SmallExamples.vo | 0m03.30s || +0m00.28s | +8.48% 0m03.49s | PushButtonSynthesis/SaturatedSolinas.vo | 0m03.25s || +0m00.24s | +7.38% 0m02.38s | curve25519_32.c | 0m02.28s || +0m00.10s | +4.38% 0m01.66s | PushButtonSynthesis/BarrettReduction.vo | 0m01.39s || +0m00.27s | +19.42% 0m01.47s | curve25519_64.c | 0m01.42s || +0m00.05s | +3.52% 0m01.34s | CLI.vo | 0m01.45s || -0m00.10s | -7.58% 0m01.32s | PushButtonSynthesis/MontgomeryReduction.vo | 0m01.30s || +0m00.02s | +1.53% 0m01.08s | secp256k1_64.c | 0m01.04s || +0m00.04s | +3.84% 0m01.07s | StandaloneOCamlMain.vo | 0m01.10s || -0m00.03s | -2.72% 0m01.04s | StandaloneHaskellMain.vo | 0m01.12s || -0m00.08s | -7.14% 0m01.04s | p224_64.c | 0m01.14s || -0m00.09s | -8.77% 0m01.00s | p256_64.c | 0m00.98s || +0m00.02s | +2.04%
Diffstat (limited to 'src')
-rw-r--r--src/Arithmetic.v48
1 files changed, 45 insertions, 3 deletions
diff --git a/src/Arithmetic.v b/src/Arithmetic.v
index 4436bdf0e..dc4c5031b 100644
--- a/src/Arithmetic.v
+++ b/src/Arithmetic.v
@@ -178,6 +178,47 @@ Module Associational.
autorewrite with zsimplify_const; rewrite eval_split; trivial.
Qed.
+ (* reduce at most [n] times, stopping early if the high list is nil at any point *)
+ Definition repeat_reduce (n : nat) (s:Z) (c:list _) (p:list _) : list (Z * Z)
+ := nat_rect
+ _
+ (fun p => p)
+ (fun n' repeat_reduce_n' p
+ => let lo_hi := split s p in
+ if (length (snd lo_hi) =? 0)%nat
+ then p
+ else let p := fst lo_hi ++ mul c (snd lo_hi) in
+ repeat_reduce_n' p)
+ n
+ p.
+
+ Lemma repeat_reduce_S_step n s c p
+ : repeat_reduce (S n) s c p
+ = if (length (snd (split s p)) =? 0)%nat
+ then p
+ else repeat_reduce n s c (reduce s c p).
+ Proof using Type. cbv [repeat_reduce]; cbn [nat_rect]; break_innermost_match; auto. Qed.
+
+ Lemma eval_repeat_reduce n s c p (s_nz:s<>0) (modulus_nz:s-eval c<>0) :
+ eval (repeat_reduce n s c p) mod (s - eval c) = eval p mod (s - eval c).
+ Proof using Type.
+ revert p; induction n as [|n IHn]; intro p; [ reflexivity | ];
+ rewrite repeat_reduce_S_step; break_innermost_match;
+ [ reflexivity | rewrite IHn ].
+ now rewrite eval_reduce.
+ Qed.
+ Hint Rewrite eval_repeat_reduce : push_eval.
+
+ Lemma eval_repeat_reduce_adjusted n s c p w c' (s_nz:s<>0) (modulus_nz:s-eval c<>0)
+ (w_mod:w mod s = 0) (w_nz:w <> 0) (Hc' : eval c' = (w / s) * eval c) :
+ eval (repeat_reduce n w c' p) mod (s - eval c) = eval p mod (s - eval c).
+ Proof using Type.
+ revert p; induction n as [|n IHn]; intro p; [ reflexivity | ];
+ rewrite repeat_reduce_S_step; break_innermost_match;
+ [ reflexivity | rewrite IHn ].
+ now rewrite eval_reduce_adjusted.
+ Qed.
+
(*
Definition splitQ (s:Q) (p:list (Z*Z)) : list (Z*Z) * list (Z*Z)
:= let hi_lo := partition (fun t => (fst t * Zpos (Qden s)) mod (Qnum s) =? 0) p in
@@ -789,7 +830,7 @@ Module Positional.
:= let a_a := to_associational n a in
let b_a := to_associational n b in
let ab_a := Associational.mul a_a b_a in
- let abm_a := Associational.reduce s c ab_a in
+ let abm_a := Associational.repeat_reduce n s c ab_a in
from_associational n abm_a.
Lemma eval_mulmod n (f g:list Z)
(Hf : length f = n) (Hg : length g = n) :
@@ -797,13 +838,14 @@ Module Positional.
= (eval n f * eval n g) mod (s - Associational.eval c).
Proof using m_nz s_nz weight_0 weight_nz. cbv [mulmod]; push; trivial.
destruct f, g; simpl in *; [ right; subst n | left; try omega.. ].
- clear; cbv -[Associational.reduce].
+ clear; cbv -[Associational.repeat_reduce].
induction c as [|?? IHc]; simpl; trivial. Qed.
Definition squaremod (n:nat) (a:list Z) : list Z
:= let a_a := to_associational n a in
let aa_a := Associational.reduce_square s c a_a in
- from_associational n aa_a.
+ let aam_a := Associational.repeat_reduce (pred n) s c aa_a in
+ from_associational n aam_a.
Lemma eval_squaremod n (f:list Z)
(Hf : length f = n) :
eval n (squaremod n f) mod (s - Associational.eval c)