diff options
author | Jason Gross <jgross@mit.edu> | 2019-01-25 16:19:49 -0500 |
---|---|---|
committer | Jason Gross <jasongross9@gmail.com> | 2019-01-26 18:12:06 -0500 |
commit | 90c59282be5ba1f2cd7c2561d17f0559c140c597 (patch) | |
tree | 2aa4a1b468c3a848ffb44a46e2b6c5b3cc1630fc /src | |
parent | 299aec51224743bea006460af8edc37525a48baa (diff) |
Reduce more in mulmod and squaremod
After some debugging with Andres, the issue seems to be that when we
have multiple TAPs, the first reduction will leave some of the partial
products past the end of the table, because it "folds over" the partial
products table at the location of each TAP. In the worst case, if we
have a TAP at 1 and a TAP at the second-highest limb, we will have to
reduce as many times as there are limbs. To prevent quadratic blowup,
we short-circuit the repeat_reduce calculation whenever the list of high
terms is nil. We deliberately do not support primes with a TAP in the
highest limb.
After | File Name | Before || Change | % Change
-----------------------------------------------------------------------------------------------------------
17m49.68s | Total | 17m34.83s || +0m14.84s | +1.40%
-----------------------------------------------------------------------------------------------------------
2m22.88s | PushButtonSynthesis/WordByWordMontgomeryReificationCache.vo | 2m21.60s || +0m01.28s | +0.90%
2m19.62s | Arithmetic.vo | 2m17.66s || +0m01.96s | +1.42%
0m34.02s | PushButtonSynthesis/UnsaturatedSolinasReificationCache.vo | 0m32.24s || +0m01.78s | +5.52%
0m32.06s | ExtractionHaskell/unsaturated_solinas | 0m30.99s || +0m01.07s | +3.45%
0m19.95s | SlowPrimeSynthesisExamples.vo | 0m18.40s || +0m01.55s | +8.42%
3m12.02s | p384_32.c | 3m12.03s || -0m00.00s | -0.00%
1m02.14s | Fancy/Montgomery256.vo | 1m02.09s || +0m00.04s | +0.08%
0m51.24s | Fancy/Barrett256.vo | 0m51.28s || -0m00.03s | -0.07%
0m45.72s | ExtractionHaskell/word_by_word_montgomery | 0m45.05s || +0m00.67s | +1.48%
0m38.67s | p521_32.c | 0m38.24s || +0m00.42s | +1.12%
0m31.94s | p521_64.c | 0m31.38s || +0m00.56s | +1.78%
0m24.85s | ExtractionHaskell/saturated_solinas | 0m24.45s || +0m00.40s | +1.63%
0m23.53s | PushButtonSynthesis/MontgomeryReductionReificationCache.vo | 0m22.79s || +0m00.74s | +3.24%
0m21.96s | PushButtonSynthesis/WordByWordMontgomery.vo | 0m21.88s || +0m00.08s | +0.36%
0m19.60s | PushButtonSynthesis/UnsaturatedSolinas.vo | 0m19.56s || +0m00.04s | +0.20%
0m17.76s | ExtractionOCaml/word_by_word_montgomery | 0m17.26s || +0m00.50s | +2.89%
0m14.90s | p448_solinas_64.c | 0m14.91s || -0m00.00s | -0.06%
0m13.55s | secp256k1_32.c | 0m13.39s || +0m00.16s | +1.19%
0m13.31s | p256_32.c | 0m13.06s || +0m00.25s | +1.91%
0m13.30s | PushButtonSynthesis/BarrettReductionReificationCache.vo | 0m12.44s || +0m00.86s | +6.91%
0m11.47s | p484_64.c | 0m11.31s || +0m00.16s | +1.41%
0m11.43s | ExtractionOCaml/unsaturated_solinas | 0m11.15s || +0m00.27s | +2.51%
0m10.55s | ExtractionOCaml/word_by_word_montgomery.ml | 0m10.66s || -0m00.10s | -1.03%
0m08.42s | ExtractionOCaml/saturated_solinas | 0m08.24s || +0m00.17s | +2.18%
0m07.07s | ExtractionOCaml/unsaturated_solinas.ml | 0m07.04s || +0m00.03s | +0.42%
0m06.66s | COperationSpecifications.vo | 0m06.31s || +0m00.35s | +5.54%
0m06.50s | ExtractionHaskell/word_by_word_montgomery.hs | 0m06.39s || +0m00.11s | +1.72%
0m06.19s | p224_32.c | 0m05.99s || +0m00.20s | +3.33%
0m05.35s | p384_64.c | 0m05.24s || +0m00.10s | +2.09%
0m05.26s | ExtractionOCaml/saturated_solinas.ml | 0m05.24s || +0m00.01s | +0.38%
0m05.24s | ExtractionHaskell/unsaturated_solinas.hs | 0m05.17s || +0m00.07s | +1.35%
0m04.23s | ExtractionHaskell/saturated_solinas.hs | 0m04.12s || +0m00.11s | +2.66%
0m04.14s | PushButtonSynthesis/SaturatedSolinasReificationCache.vo | 0m03.89s || +0m00.24s | +6.42%
0m03.68s | PushButtonSynthesis/Primitives.vo | 0m03.62s || +0m00.06s | +1.65%
0m03.58s | PushButtonSynthesis/SmallExamples.vo | 0m03.30s || +0m00.28s | +8.48%
0m03.49s | PushButtonSynthesis/SaturatedSolinas.vo | 0m03.25s || +0m00.24s | +7.38%
0m02.38s | curve25519_32.c | 0m02.28s || +0m00.10s | +4.38%
0m01.66s | PushButtonSynthesis/BarrettReduction.vo | 0m01.39s || +0m00.27s | +19.42%
0m01.47s | curve25519_64.c | 0m01.42s || +0m00.05s | +3.52%
0m01.34s | CLI.vo | 0m01.45s || -0m00.10s | -7.58%
0m01.32s | PushButtonSynthesis/MontgomeryReduction.vo | 0m01.30s || +0m00.02s | +1.53%
0m01.08s | secp256k1_64.c | 0m01.04s || +0m00.04s | +3.84%
0m01.07s | StandaloneOCamlMain.vo | 0m01.10s || -0m00.03s | -2.72%
0m01.04s | StandaloneHaskellMain.vo | 0m01.12s || -0m00.08s | -7.14%
0m01.04s | p224_64.c | 0m01.14s || -0m00.09s | -8.77%
0m01.00s | p256_64.c | 0m00.98s || +0m00.02s | +2.04%
Diffstat (limited to 'src')
-rw-r--r-- | src/Arithmetic.v | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/src/Arithmetic.v b/src/Arithmetic.v index 4436bdf0e..dc4c5031b 100644 --- a/src/Arithmetic.v +++ b/src/Arithmetic.v @@ -178,6 +178,47 @@ Module Associational. autorewrite with zsimplify_const; rewrite eval_split; trivial. Qed. + (* reduce at most [n] times, stopping early if the high list is nil at any point *) + Definition repeat_reduce (n : nat) (s:Z) (c:list _) (p:list _) : list (Z * Z) + := nat_rect + _ + (fun p => p) + (fun n' repeat_reduce_n' p + => let lo_hi := split s p in + if (length (snd lo_hi) =? 0)%nat + then p + else let p := fst lo_hi ++ mul c (snd lo_hi) in + repeat_reduce_n' p) + n + p. + + Lemma repeat_reduce_S_step n s c p + : repeat_reduce (S n) s c p + = if (length (snd (split s p)) =? 0)%nat + then p + else repeat_reduce n s c (reduce s c p). + Proof using Type. cbv [repeat_reduce]; cbn [nat_rect]; break_innermost_match; auto. Qed. + + Lemma eval_repeat_reduce n s c p (s_nz:s<>0) (modulus_nz:s-eval c<>0) : + eval (repeat_reduce n s c p) mod (s - eval c) = eval p mod (s - eval c). + Proof using Type. + revert p; induction n as [|n IHn]; intro p; [ reflexivity | ]; + rewrite repeat_reduce_S_step; break_innermost_match; + [ reflexivity | rewrite IHn ]. + now rewrite eval_reduce. + Qed. + Hint Rewrite eval_repeat_reduce : push_eval. + + Lemma eval_repeat_reduce_adjusted n s c p w c' (s_nz:s<>0) (modulus_nz:s-eval c<>0) + (w_mod:w mod s = 0) (w_nz:w <> 0) (Hc' : eval c' = (w / s) * eval c) : + eval (repeat_reduce n w c' p) mod (s - eval c) = eval p mod (s - eval c). + Proof using Type. + revert p; induction n as [|n IHn]; intro p; [ reflexivity | ]; + rewrite repeat_reduce_S_step; break_innermost_match; + [ reflexivity | rewrite IHn ]. + now rewrite eval_reduce_adjusted. + Qed. + (* Definition splitQ (s:Q) (p:list (Z*Z)) : list (Z*Z) * list (Z*Z) := let hi_lo := partition (fun t => (fst t * Zpos (Qden s)) mod (Qnum s) =? 0) p in @@ -789,7 +830,7 @@ Module Positional. := let a_a := to_associational n a in let b_a := to_associational n b in let ab_a := Associational.mul a_a b_a in - let abm_a := Associational.reduce s c ab_a in + let abm_a := Associational.repeat_reduce n s c ab_a in from_associational n abm_a. Lemma eval_mulmod n (f g:list Z) (Hf : length f = n) (Hg : length g = n) : @@ -797,13 +838,14 @@ Module Positional. = (eval n f * eval n g) mod (s - Associational.eval c). Proof using m_nz s_nz weight_0 weight_nz. cbv [mulmod]; push; trivial. destruct f, g; simpl in *; [ right; subst n | left; try omega.. ]. - clear; cbv -[Associational.reduce]. + clear; cbv -[Associational.repeat_reduce]. induction c as [|?? IHc]; simpl; trivial. Qed. Definition squaremod (n:nat) (a:list Z) : list Z := let a_a := to_associational n a in let aa_a := Associational.reduce_square s c a_a in - from_associational n aa_a. + let aam_a := Associational.repeat_reduce (pred n) s c aa_a in + from_associational n aam_a. Lemma eval_squaremod n (f:list Z) (Hf : length f = n) : eval n (squaremod n f) mod (s - Associational.eval c) |