From 7e939cd63236d0a6a492ddff5015daf3f706a3bc Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Thu, 19 Oct 2017 14:44:48 -0400 Subject: Switch arithmetic to cps for Z * Z under the hood This is in preparation for writing a ~compiler for the arithmetic things to expression trees. I'm not sure what's up with femul in the table below; I ran it again and got: After: src/Specific/NISTP256/AMD64/femul (real: 115.70, user: 115.25, sys: 0.44, mem: 3571448 ko) Before: src/Specific/NISTP256/AMD64/femul (real: 118.49, user: 117.99, sys: 0.43, mem: 3581612 ko) After | File Name | Before || Change --------------------------------------------------------------------------------------------- 17m02.82s | Total | 16m36.20s || +0m26.61s --------------------------------------------------------------------------------------------- 2m27.04s | Specific/NISTP256/AMD64/femul | 2m04.60s || +0m22.43s 1m38.55s | Specific/X2448/Karatsuba/C64/femul | 1m41.44s || -0m02.89s 0m12.46s | Arithmetic/Saturated/AddSub | 0m09.77s || +0m02.69s 3m22.38s | Specific/X25519/C64/ladderstep | 3m23.49s || -0m01.11s 0m54.40s | Specific/X25519/C32/fesquare | 0m52.68s || +0m01.71s 0m28.70s | Arithmetic/Karatsuba | 0m27.59s || +0m01.10s 0m10.00s | Arithmetic/Saturated/MontgomeryAPI | 0m08.95s || +0m01.05s 0m08.15s | Specific/X2448/Karatsuba/C64/Synthesis | 0m09.47s || -0m01.32s 0m05.62s | Arithmetic/Saturated/MulSplit | 0m04.28s || +0m01.33s 1m29.44s | Specific/X25519/C32/femul | 1m28.55s || +0m00.89s 0m39.38s | Specific/X25519/C32/freeze | 0m38.62s || +0m00.76s 0m31.54s | Specific/NISTP256/AMD128/femul | 0m31.60s || -0m00.06s 0m24.80s | Specific/X25519/C64/femul | 0m24.10s || +0m00.69s 0m23.82s | Specific/NISTP256/AMD64/fesub | 0m23.52s || +0m00.30s 0m21.81s | Specific/NISTP256/AMD64/feadd | 0m21.90s || -0m00.08s 0m20.30s | Specific/X25519/C64/freeze | 0m20.26s || +0m00.03s 0m20.12s | Specific/X25519/C32/Synthesis | 0m20.77s || -0m00.64s 0m19.12s | Specific/X25519/C64/fesquare | 0m19.02s || +0m00.10s 0m17.28s | Specific/NISTP256/AMD64/feopp | 0m17.68s || -0m00.39s 0m15.99s | Specific/NISTP256/AMD128/fesub | 0m16.03s || -0m00.04s 0m15.88s | Specific/NISTP256/AMD128/feadd | 0m16.56s || -0m00.67s 0m15.03s | Specific/NISTP256/AMD64/fenz | 0m15.00s || +0m00.02s 0m14.18s | Specific/NISTP256/AMD128/fenz | 0m14.12s || +0m00.06s 0m13.46s | Specific/NISTP256/AMD128/feopp | 0m12.88s || +0m00.58s 0m12.15s | Arithmetic/Core | 0m12.03s || +0m00.12s 0m07.82s | Arithmetic/Saturated/Core | 0m07.05s || +0m00.77s 0m07.13s | Specific/NISTP256/AMD64/Synthesis | 0m08.05s || -0m00.92s 0m05.48s | Specific/X25519/C64/Synthesis | 0m05.68s || -0m00.19s 0m04.02s | Specific/Framework/ArithmeticSynthesis/Montgomery | 0m03.89s || +0m00.12s 0m03.52s | Arithmetic/MontgomeryReduction/WordByWord/Proofs | 0m03.34s || +0m00.18s 0m03.32s | Specific/NISTP256/AMD128/Synthesis | 0m03.46s || -0m00.14s 0m02.30s | Specific/Framework/ArithmeticSynthesis/Defaults | 0m02.31s || -0m00.01s 0m02.08s | Arithmetic/Saturated/Freeze | 0m01.94s || +0m00.14s 0m01.66s | Specific/Framework/OutputType | 0m01.66s || +0m00.00s 0m01.54s | Arithmetic/CoreUnfolder | 0m01.43s || +0m00.11s 0m01.35s | Specific/Framework/ArithmeticSynthesis/Karatsuba | 0m01.28s || +0m00.07s 0m01.13s | Arithmetic/Saturated/CoreUnfolder | 0m01.16s || -0m00.03s 0m01.06s | Arithmetic/Saturated/WrappersUnfolder | 0m01.04s || +0m00.02s 0m01.04s | Arithmetic/Saturated/UniformWeight | 0m00.95s || +0m00.09s 0m01.03s | Specific/Framework/ArithmeticSynthesis/Base | 0m01.14s || -0m00.10s 0m01.02s | Specific/Framework/SynthesisFramework | 0m01.04s || -0m00.02s 0m00.97s | Specific/Framework/ArithmeticSynthesis/HelperTactics | 0m01.01s || -0m00.04s 0m00.92s | Specific/Framework/ReificationTypes | 0m00.90s || +0m00.02s 0m00.92s | Specific/Framework/ArithmeticSynthesis/Freeze | 0m00.93s || -0m00.01s 0m00.90s | Arithmetic/Saturated/MulSplitUnfolder | 0m00.83s || +0m00.07s 0m00.83s | Specific/Framework/ReificationTypesPackage | 0m00.79s || +0m00.03s 0m00.83s | Arithmetic/Saturated/FreezeUnfolder | 0m00.86s || -0m00.03s 0m00.82s | Specific/Framework/ArithmeticSynthesis/BasePackage | 0m00.77s || +0m00.04s 0m00.81s | Specific/Framework/ArithmeticSynthesis/SquareFromMul | 0m00.72s || +0m00.09s 0m00.81s | Specific/Framework/ArithmeticSynthesis/LadderstepPackage | 0m00.82s || -0m00.00s 0m00.80s | Specific/Framework/MontgomeryReificationTypesPackage | 0m00.82s || -0m00.01s 0m00.78s | Specific/Framework/ArithmeticSynthesis/MontgomeryPackage | 0m00.79s || -0m00.01s 0m00.78s | Arithmetic/Saturated/Wrappers | 0m00.78s || +0m00.00s 0m00.76s | Specific/Framework/ArithmeticSynthesis/FreezePackage | 0m00.80s || -0m00.04s 0m00.76s | Specific/Framework/ArithmeticSynthesis/DefaultsPackage | 0m00.75s || +0m00.01s 0m00.75s | Specific/Framework/MontgomeryReificationTypes | 0m00.78s || -0m00.03s 0m00.73s | Specific/Framework/ArithmeticSynthesis/Ladderstep | 0m00.77s || -0m00.04s 0m00.73s | Arithmetic/MontgomeryReduction/WordByWord/Definition | 0m00.80s || -0m00.07s 0m00.72s | Arithmetic/Saturated/UniformWeightInstances | 0m00.78s || -0m00.06s 0m00.68s | Specific/Framework/ArithmeticSynthesis/KaratsubaPackage | 0m00.76s || -0m00.07s 0m00.43s | Util/ZUtil/CPS | 0m00.42s || +0m00.01s --- src/Arithmetic/Core.v | 73 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 25 deletions(-) (limited to 'src/Arithmetic/Core.v') diff --git a/src/Arithmetic/Core.v b/src/Arithmetic/Core.v index 430e1c19a..f2d9ee00b 100644 --- a/src/Arithmetic/Core.v +++ b/src/Arithmetic/Core.v @@ -585,9 +585,10 @@ Module B. Context {T : Type}. Fixpoint place_cps (t:limb) (i:nat) (f:nat * Z->T) := - if dec (fst t mod weight i = 0) + Z.eqb_cps (fst t mod weight i) 0 (fun eqb => + if eqb then f (i, let c := fst t / weight i in (c * snd t)%RT) - else match i with S i' => place_cps t i' f | O => f (O, fst t * snd t)%RT end. + else match i with S i' => place_cps t i' f | O => f (O, fst t * snd t)%RT end). End place_cps. Definition place t i := place_cps t i id. @@ -599,12 +600,13 @@ Module B. Lemma place_cps_in_range (t:limb) (n:nat) : (fst (place_cps t n id) < S n)%nat. - Proof using Type. induction n; simpl; break_match; simpl; omega. Qed. + Proof using Type. induction n; simpl; cbv [Z.eqb_cps]; break_match; simpl; omega. Qed. Lemma weight_place_cps t i : weight (fst (place_cps t i id)) * snd (place_cps t i id) = fst t * snd t. Proof using Type*. - induction i; cbv [id]; simpl place_cps; break_match; + induction i; cbv [id]; simpl place_cps; cbv [Z.eqb_cps]; break_match; + Z.ltb_to_lt; autorewrite with cancel_pair; try match goal with [H:_|-_] => apply Z_div_exact_full_2 in H end; nsatz || auto. @@ -962,38 +964,59 @@ End B. (* Modulo and div that do shifts if possible, otherwise normal mod/div *) Section DivMod. - Definition modulo (a b : Z) : Z := - if dec (2 ^ (Z.log2 b) = b) - then let x := (Z.ones (Z.log2 b)) in (a &' x)%RT - else Z.modulo a b. - - Definition div (a b : Z) : Z := - if dec (2 ^ (Z.log2 b) = b) - then let x := Z.log2 b in (a >> x)%RT - else Z.div a b. - - Lemma div_correct a b : div a b = Z.div a b. + Definition modulo_cps {T} (a b : Z) (f : Z -> T) : T := + Z.eqb_cps (2 ^ (Z.log2 b)) b (fun eqb => + if eqb + then let x := (Z.ones (Z.log2 b)) in f (a &' x)%RT + else f (Z.modulo a b)). + + Definition div_cps {T} (a b : Z) (f : Z -> T) : T := + Z.eqb_cps (2 ^ (Z.log2 b)) b (fun eqb => + if eqb + then let x := Z.log2 b in f ((a >> x)%RT) + else f (Z.div a b)). + + Definition modulo (a b : Z) : Z := modulo_cps a b id. + Definition div (a b : Z) : Z := div_cps a b id. + + Lemma modulo_id {T} a b f + : @modulo_cps T a b f = f (modulo a b). + Proof. cbv [modulo_cps modulo]; autorewrite with uncps; break_match; reflexivity. Qed. + Hint Opaque modulo : uncps. + Hint Rewrite @modulo_id : uncps. + + Lemma div_id {T} a b f + : @div_cps T a b f = f (div a b). + Proof. cbv [div_cps div]; autorewrite with uncps; break_match; reflexivity. Qed. + Hint Opaque div : uncps. + Hint Rewrite @div_id : uncps. + + Lemma div_cps_correct {T} a b f : @div_cps T a b f = f (Z.div a b). Proof. - cbv [div]; intros. break_match; try reflexivity. + cbv [div_cps Z.eqb_cps]; intros. break_match; try reflexivity. rewrite Z.shiftr_div_pow2 by apply Z.log2_nonneg. - congruence. + Z.ltb_to_lt; congruence. Qed. - Lemma modulo_correct a b : modulo a b = Z.modulo a b. + Lemma modulo_cps_correct {T} a b f : @modulo_cps T a b f = f (Z.modulo a b). Proof. - cbv [modulo]; intros. break_match; try reflexivity. + cbv [modulo_cps Z.eqb_cps]; intros. break_match; try reflexivity. rewrite Z.land_ones by apply Z.log2_nonneg. - congruence. + Z.ltb_to_lt; congruence. Qed. + Definition div_correct a b : div a b = Z.div a b := div_cps_correct a b id. + Definition modulo_correct a b : modulo a b = Z.modulo a b := modulo_cps_correct a b id. + Lemma div_mod a b (H:b <> 0) : a = b * div a b + modulo a b. Proof. - cbv [div modulo]; intros. break_match; auto using Z.div_mod. - rewrite Z.land_ones, Z.shiftr_div_pow2 by apply Z.log2_nonneg. - pose proof (Z.div_mod a b H). congruence. + rewrite div_correct, modulo_correct; auto using Z.div_mod. Qed. End DivMod. +Hint Opaque div modulo : uncps. +Hint Rewrite @div_id @modulo_id : uncps. + Import B. Create HintDb basesystem_partial_evaluation_unfolder. @@ -1045,7 +1068,7 @@ Hint Unfold Positional.eval_from Positional.select_cps Positional.select - modulo div + modulo div modulo_cps div_cps id_tuple_with_alt id_tuple'_with_alt Z.add_get_carry_full Z.add_get_carry_full_cps : basesystem_partial_evaluation_unfolder. @@ -1055,7 +1078,7 @@ Hint Unfold CPSUtil.Tuple.mapi_with_cps CPSUtil.Tuple.mapi_with'_cps CPSUtil.flat_map_cps CPSUtil.on_tuple_cps CPSUtil.fold_right_cps2 Decidable.dec Decidable.dec_eq_Z id_tuple_with_alt id_tuple'_with_alt - Z.add_get_carry_full Z.add_get_carry_full_cps Z.mul_split Z.mul_split_cps + Z.add_get_carry_full Z.add_get_carry_full_cps Z.mul_split Z.mul_split_cps Z.mul_split_cps' : basesystem_partial_evaluation_unfolder. -- cgit v1.2.3