aboutsummaryrefslogtreecommitdiff
path: root/src/Arithmetic/Core.v
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-10-19 14:44:48 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-10-19 15:40:23 -0400
commit7e939cd63236d0a6a492ddff5015daf3f706a3bc (patch)
treefa19e772dc624eb7899017b55e527de184e7bf8f /src/Arithmetic/Core.v
parent79b586e4589f56d081301de92b305569c1077ed2 (diff)
Switch arithmetic to cps for Z * Z under the hood
This is in preparation for writing a ~compiler for the arithmetic things to expression trees. I'm not sure what's up with femul in the table below; I ran it again and got: After: src/Specific/NISTP256/AMD64/femul (real: 115.70, user: 115.25, sys: 0.44, mem: 3571448 ko) Before: src/Specific/NISTP256/AMD64/femul (real: 118.49, user: 117.99, sys: 0.43, mem: 3581612 ko) After | File Name | Before || Change --------------------------------------------------------------------------------------------- 17m02.82s | Total | 16m36.20s || +0m26.61s --------------------------------------------------------------------------------------------- 2m27.04s | Specific/NISTP256/AMD64/femul | 2m04.60s || +0m22.43s 1m38.55s | Specific/X2448/Karatsuba/C64/femul | 1m41.44s || -0m02.89s 0m12.46s | Arithmetic/Saturated/AddSub | 0m09.77s || +0m02.69s 3m22.38s | Specific/X25519/C64/ladderstep | 3m23.49s || -0m01.11s 0m54.40s | Specific/X25519/C32/fesquare | 0m52.68s || +0m01.71s 0m28.70s | Arithmetic/Karatsuba | 0m27.59s || +0m01.10s 0m10.00s | Arithmetic/Saturated/MontgomeryAPI | 0m08.95s || +0m01.05s 0m08.15s | Specific/X2448/Karatsuba/C64/Synthesis | 0m09.47s || -0m01.32s 0m05.62s | Arithmetic/Saturated/MulSplit | 0m04.28s || +0m01.33s 1m29.44s | Specific/X25519/C32/femul | 1m28.55s || +0m00.89s 0m39.38s | Specific/X25519/C32/freeze | 0m38.62s || +0m00.76s 0m31.54s | Specific/NISTP256/AMD128/femul | 0m31.60s || -0m00.06s 0m24.80s | Specific/X25519/C64/femul | 0m24.10s || +0m00.69s 0m23.82s | Specific/NISTP256/AMD64/fesub | 0m23.52s || +0m00.30s 0m21.81s | Specific/NISTP256/AMD64/feadd | 0m21.90s || -0m00.08s 0m20.30s | Specific/X25519/C64/freeze | 0m20.26s || +0m00.03s 0m20.12s | Specific/X25519/C32/Synthesis | 0m20.77s || -0m00.64s 0m19.12s | Specific/X25519/C64/fesquare | 0m19.02s || +0m00.10s 0m17.28s | Specific/NISTP256/AMD64/feopp | 0m17.68s || -0m00.39s 0m15.99s | Specific/NISTP256/AMD128/fesub | 0m16.03s || -0m00.04s 0m15.88s | Specific/NISTP256/AMD128/feadd | 0m16.56s || -0m00.67s 0m15.03s | Specific/NISTP256/AMD64/fenz | 0m15.00s || +0m00.02s 0m14.18s | Specific/NISTP256/AMD128/fenz | 0m14.12s || +0m00.06s 0m13.46s | Specific/NISTP256/AMD128/feopp | 0m12.88s || +0m00.58s 0m12.15s | Arithmetic/Core | 0m12.03s || +0m00.12s 0m07.82s | Arithmetic/Saturated/Core | 0m07.05s || +0m00.77s 0m07.13s | Specific/NISTP256/AMD64/Synthesis | 0m08.05s || -0m00.92s 0m05.48s | Specific/X25519/C64/Synthesis | 0m05.68s || -0m00.19s 0m04.02s | Specific/Framework/ArithmeticSynthesis/Montgomery | 0m03.89s || +0m00.12s 0m03.52s | Arithmetic/MontgomeryReduction/WordByWord/Proofs | 0m03.34s || +0m00.18s 0m03.32s | Specific/NISTP256/AMD128/Synthesis | 0m03.46s || -0m00.14s 0m02.30s | Specific/Framework/ArithmeticSynthesis/Defaults | 0m02.31s || -0m00.01s 0m02.08s | Arithmetic/Saturated/Freeze | 0m01.94s || +0m00.14s 0m01.66s | Specific/Framework/OutputType | 0m01.66s || +0m00.00s 0m01.54s | Arithmetic/CoreUnfolder | 0m01.43s || +0m00.11s 0m01.35s | Specific/Framework/ArithmeticSynthesis/Karatsuba | 0m01.28s || +0m00.07s 0m01.13s | Arithmetic/Saturated/CoreUnfolder | 0m01.16s || -0m00.03s 0m01.06s | Arithmetic/Saturated/WrappersUnfolder | 0m01.04s || +0m00.02s 0m01.04s | Arithmetic/Saturated/UniformWeight | 0m00.95s || +0m00.09s 0m01.03s | Specific/Framework/ArithmeticSynthesis/Base | 0m01.14s || -0m00.10s 0m01.02s | Specific/Framework/SynthesisFramework | 0m01.04s || -0m00.02s 0m00.97s | Specific/Framework/ArithmeticSynthesis/HelperTactics | 0m01.01s || -0m00.04s 0m00.92s | Specific/Framework/ReificationTypes | 0m00.90s || +0m00.02s 0m00.92s | Specific/Framework/ArithmeticSynthesis/Freeze | 0m00.93s || -0m00.01s 0m00.90s | Arithmetic/Saturated/MulSplitUnfolder | 0m00.83s || +0m00.07s 0m00.83s | Specific/Framework/ReificationTypesPackage | 0m00.79s || +0m00.03s 0m00.83s | Arithmetic/Saturated/FreezeUnfolder | 0m00.86s || -0m00.03s 0m00.82s | Specific/Framework/ArithmeticSynthesis/BasePackage | 0m00.77s || +0m00.04s 0m00.81s | Specific/Framework/ArithmeticSynthesis/SquareFromMul | 0m00.72s || +0m00.09s 0m00.81s | Specific/Framework/ArithmeticSynthesis/LadderstepPackage | 0m00.82s || -0m00.00s 0m00.80s | Specific/Framework/MontgomeryReificationTypesPackage | 0m00.82s || -0m00.01s 0m00.78s | Specific/Framework/ArithmeticSynthesis/MontgomeryPackage | 0m00.79s || -0m00.01s 0m00.78s | Arithmetic/Saturated/Wrappers | 0m00.78s || +0m00.00s 0m00.76s | Specific/Framework/ArithmeticSynthesis/FreezePackage | 0m00.80s || -0m00.04s 0m00.76s | Specific/Framework/ArithmeticSynthesis/DefaultsPackage | 0m00.75s || +0m00.01s 0m00.75s | Specific/Framework/MontgomeryReificationTypes | 0m00.78s || -0m00.03s 0m00.73s | Specific/Framework/ArithmeticSynthesis/Ladderstep | 0m00.77s || -0m00.04s 0m00.73s | Arithmetic/MontgomeryReduction/WordByWord/Definition | 0m00.80s || -0m00.07s 0m00.72s | Arithmetic/Saturated/UniformWeightInstances | 0m00.78s || -0m00.06s 0m00.68s | Specific/Framework/ArithmeticSynthesis/KaratsubaPackage | 0m00.76s || -0m00.07s 0m00.43s | Util/ZUtil/CPS | 0m00.42s || +0m00.01s
Diffstat (limited to 'src/Arithmetic/Core.v')
-rw-r--r--src/Arithmetic/Core.v73
1 files changed, 48 insertions, 25 deletions
diff --git a/src/Arithmetic/Core.v b/src/Arithmetic/Core.v
index 430e1c19a..f2d9ee00b 100644
--- a/src/Arithmetic/Core.v
+++ b/src/Arithmetic/Core.v
@@ -585,9 +585,10 @@ Module B.
Context {T : Type}.
Fixpoint place_cps (t:limb) (i:nat) (f:nat * Z->T) :=
- if dec (fst t mod weight i = 0)
+ Z.eqb_cps (fst t mod weight i) 0 (fun eqb =>
+ if eqb
then f (i, let c := fst t / weight i in (c * snd t)%RT)
- else match i with S i' => place_cps t i' f | O => f (O, fst t * snd t)%RT end.
+ else match i with S i' => place_cps t i' f | O => f (O, fst t * snd t)%RT end).
End place_cps.
Definition place t i := place_cps t i id.
@@ -599,12 +600,13 @@ Module B.
Lemma place_cps_in_range (t:limb) (n:nat)
: (fst (place_cps t n id) < S n)%nat.
- Proof using Type. induction n; simpl; break_match; simpl; omega. Qed.
+ Proof using Type. induction n; simpl; cbv [Z.eqb_cps]; break_match; simpl; omega. Qed.
Lemma weight_place_cps t i
: weight (fst (place_cps t i id)) * snd (place_cps t i id)
= fst t * snd t.
Proof using Type*.
- induction i; cbv [id]; simpl place_cps; break_match;
+ induction i; cbv [id]; simpl place_cps; cbv [Z.eqb_cps]; break_match;
+ Z.ltb_to_lt;
autorewrite with cancel_pair;
try match goal with [H:_|-_] => apply Z_div_exact_full_2 in H end;
nsatz || auto.
@@ -962,38 +964,59 @@ End B.
(* Modulo and div that do shifts if possible, otherwise normal mod/div *)
Section DivMod.
- Definition modulo (a b : Z) : Z :=
- if dec (2 ^ (Z.log2 b) = b)
- then let x := (Z.ones (Z.log2 b)) in (a &' x)%RT
- else Z.modulo a b.
-
- Definition div (a b : Z) : Z :=
- if dec (2 ^ (Z.log2 b) = b)
- then let x := Z.log2 b in (a >> x)%RT
- else Z.div a b.
-
- Lemma div_correct a b : div a b = Z.div a b.
+ Definition modulo_cps {T} (a b : Z) (f : Z -> T) : T :=
+ Z.eqb_cps (2 ^ (Z.log2 b)) b (fun eqb =>
+ if eqb
+ then let x := (Z.ones (Z.log2 b)) in f (a &' x)%RT
+ else f (Z.modulo a b)).
+
+ Definition div_cps {T} (a b : Z) (f : Z -> T) : T :=
+ Z.eqb_cps (2 ^ (Z.log2 b)) b (fun eqb =>
+ if eqb
+ then let x := Z.log2 b in f ((a >> x)%RT)
+ else f (Z.div a b)).
+
+ Definition modulo (a b : Z) : Z := modulo_cps a b id.
+ Definition div (a b : Z) : Z := div_cps a b id.
+
+ Lemma modulo_id {T} a b f
+ : @modulo_cps T a b f = f (modulo a b).
+ Proof. cbv [modulo_cps modulo]; autorewrite with uncps; break_match; reflexivity. Qed.
+ Hint Opaque modulo : uncps.
+ Hint Rewrite @modulo_id : uncps.
+
+ Lemma div_id {T} a b f
+ : @div_cps T a b f = f (div a b).
+ Proof. cbv [div_cps div]; autorewrite with uncps; break_match; reflexivity. Qed.
+ Hint Opaque div : uncps.
+ Hint Rewrite @div_id : uncps.
+
+ Lemma div_cps_correct {T} a b f : @div_cps T a b f = f (Z.div a b).
Proof.
- cbv [div]; intros. break_match; try reflexivity.
+ cbv [div_cps Z.eqb_cps]; intros. break_match; try reflexivity.
rewrite Z.shiftr_div_pow2 by apply Z.log2_nonneg.
- congruence.
+ Z.ltb_to_lt; congruence.
Qed.
- Lemma modulo_correct a b : modulo a b = Z.modulo a b.
+ Lemma modulo_cps_correct {T} a b f : @modulo_cps T a b f = f (Z.modulo a b).
Proof.
- cbv [modulo]; intros. break_match; try reflexivity.
+ cbv [modulo_cps Z.eqb_cps]; intros. break_match; try reflexivity.
rewrite Z.land_ones by apply Z.log2_nonneg.
- congruence.
+ Z.ltb_to_lt; congruence.
Qed.
+ Definition div_correct a b : div a b = Z.div a b := div_cps_correct a b id.
+ Definition modulo_correct a b : modulo a b = Z.modulo a b := modulo_cps_correct a b id.
+
Lemma div_mod a b (H:b <> 0) : a = b * div a b + modulo a b.
Proof.
- cbv [div modulo]; intros. break_match; auto using Z.div_mod.
- rewrite Z.land_ones, Z.shiftr_div_pow2 by apply Z.log2_nonneg.
- pose proof (Z.div_mod a b H). congruence.
+ rewrite div_correct, modulo_correct; auto using Z.div_mod.
Qed.
End DivMod.
+Hint Opaque div modulo : uncps.
+Hint Rewrite @div_id @modulo_id : uncps.
+
Import B.
Create HintDb basesystem_partial_evaluation_unfolder.
@@ -1045,7 +1068,7 @@ Hint Unfold
Positional.eval_from
Positional.select_cps
Positional.select
- modulo div
+ modulo div modulo_cps div_cps
id_tuple_with_alt id_tuple'_with_alt
Z.add_get_carry_full Z.add_get_carry_full_cps
: basesystem_partial_evaluation_unfolder.
@@ -1055,7 +1078,7 @@ Hint Unfold
CPSUtil.Tuple.mapi_with_cps CPSUtil.Tuple.mapi_with'_cps CPSUtil.flat_map_cps CPSUtil.on_tuple_cps CPSUtil.fold_right_cps2
Decidable.dec Decidable.dec_eq_Z
id_tuple_with_alt id_tuple'_with_alt
- Z.add_get_carry_full Z.add_get_carry_full_cps Z.mul_split Z.mul_split_cps
+ Z.add_get_carry_full Z.add_get_carry_full_cps Z.mul_split Z.mul_split_cps Z.mul_split_cps'
: basesystem_partial_evaluation_unfolder.