aboutsummaryrefslogtreecommitdiff
path: root/src/Fancy
diff options
context:
space:
mode:
authorGravatar jadep <jade.philipoom@gmail.com>2019-02-07 19:11:16 -0500
committerGravatar jadep <jade.philipoom@gmail.com>2019-02-07 19:11:16 -0500
commit59d5e5fd6a225e731e5149c1afcf5f26051d02d8 (patch)
tree3aa91fb93245fd6946eb18dab6d68255cf4dcd2f /src/Fancy
parentce583d76bdb8f15148fc4222d8bdec096547682a (diff)
Fix instruction-order admit; still neads cleanup
Diffstat (limited to 'src/Fancy')
-rw-r--r--src/Fancy/Barrett256.v78
-rw-r--r--src/Fancy/Prod.v220
2 files changed, 267 insertions, 31 deletions
diff --git a/src/Fancy/Barrett256.v b/src/Fancy/Barrett256.v
index 46e8f6f52..5bd2df9e7 100644
--- a/src/Fancy/Barrett256.v
+++ b/src/Fancy/Barrett256.v
@@ -232,6 +232,7 @@ Module Barrett256.
0 <= start_context x < 2^machine_wordsize ->
0 <= start_context xHigh < 2^machine_wordsize ->
0 <= start_context RegMuLow < 2^machine_wordsize ->
+ 0 <= start_context RegZero < 2^machine_wordsize ->
interp
(barrett_red256_alloc r0 r1 r30 errorP errorR) cc_start_state
(fun r => if reg_eqb r r0
@@ -256,12 +257,81 @@ Module Barrett256.
| H : ~ False |- _ => clear H
end.
- step.
- step.
+ repeat step_lhs.
- (* TODO: To prove equivalence between these two, we need to either relocate the RSHI instructions so they're in the same places or use instruction commutativity to push them down. *)
- Admitted.
+ repeat step_rhs.
+ rewrite interp_Mul256x256 with (tmp2 := extra_reg) by
+ (try congruence; push_value_unused).
+ repeat step_rhs.
+ rewrite mulhh_comm.
+ repeat step_rhs.
+ rewrite mulll_comm.
+ repeat step_rhs.
+
+ rewrite swap_add_chain
+ by
+ repeat match goal with
+ | |- _ <> _ => congruence
+ | _ => rewrite reg_eqb_refl
+ | _ => rewrite reg_eqb_neq by congruence
+ | H : ?y = _ mod ?m |- 0 <= ?y < ?m =>
+ rewrite H; apply Z.mod_pos_bound; omega
+ | |- _ = 2 ^ 256 => reflexivity
+ | |- flags_unused _ _ =>
+ cbv [flags_unused]; intros;
+ do 2 step; reflexivity
+ end.
+
+ Local Ltac simplify :=
+ repeat match goal with
+ | _ => rewrite reg_eqb_refl
+ | _ => rewrite reg_eqb_neq by congruence
+ | H : ?y = _ mod ?m |- 0 <= ?y < _ =>
+ rewrite H; apply Z.mod_pos_bound; omega
+ | _ => assumption
+ end.
+
+ repeat step_rhs.
+
+ assert (0 < 2 ^ 256) by ZeroBounds.Z.zero_bounds.
+ rewrite add_comm by simplify.
+ step_rhs.
+ rewrite addc_comm by simplify.
+ step_rhs.
+ rewrite add_comm by simplify.
+ step_rhs.
+ rewrite addc_comm by simplify.
+ step_rhs.
+ repeat step_rhs.
+ rewrite interp_Mul256x256 with (tmp2 := extra_reg) by
+ (try congruence; push_value_unused).
+ repeat step_rhs.
+ rewrite mulhh_comm by simplify.
+ repeat step_rhs.
+ rewrite mulll_comm by simplify.
+ repeat step_rhs.
+
+ rewrite swap_add_chain
+ by
+ repeat match goal with
+ | |- _ <> _ => congruence
+ | _ => rewrite reg_eqb_refl
+ | _ => rewrite reg_eqb_neq by congruence
+ | H : ?y = _ mod ?m |- 0 <= ?y < ?m =>
+ rewrite H; apply Z.mod_pos_bound; omega
+ | |- _ = 2 ^ 256 => reflexivity
+ | |- flags_unused _ _ =>
+ cbv [flags_unused]; intros;
+ repeat (step; try reflexivity)
+ end.
+
+ repeat step_rhs.
+
+ cbv [Spec.interp].
+ rewrite !reg_eqb_refl.
+ reflexivity.
+ Qed.
Lemma prod_barrett_red256_correct :
forall (cc_start_state : CC.state) (* starting carry flags *)
diff --git a/src/Fancy/Prod.v b/src/Fancy/Prod.v
index d18b3ee77..1fd35aa79 100644
--- a/src/Fancy/Prod.v
+++ b/src/Fancy/Prod.v
@@ -1,10 +1,33 @@
Require Import Coq.ZArith.ZArith.
Require Import Coq.micromega.Lia.
Require Import Coq.Lists.List. Import ListNotations.
-Require Import Crypto.Fancy.Spec.
+Require Import Crypto.Algebra.Ring. (* for ring_simplify_subterms *)
+Require Import Crypto.Fancy.Spec. Import Spec.Registers.
Require Import Crypto.Fancy.Compiler.
Require Import Crypto.Util.Tactics.BreakMatch.
-Import Spec.Registers.
+Require Import Crypto.Util.ZUtil.Tactics.PullPush.Modulo.
+Require Import Crypto.Util.ZUtil.Tactics.ZeroBounds.
+Local Open Scope Z_scope.
+
+Module Z.
+(* TODO: move to Div *)
+Lemma div_add_mod_cond_l' x y d :
+ d <> 0 ->
+ (x mod d + y) / d = (x + y) / d - x / d.
+Proof.
+ intros.
+ rewrite (Div.Z.div_add_mod_cond_l x y d) by omega.
+ ring.
+Qed.
+Lemma div_add_mod_cond_r' x y d :
+ d <> 0 ->
+ (x + y mod d) / d = (x + y) / d - y / d.
+Proof.
+ intros.
+ rewrite (Div.Z.div_add_mod_cond_r x y d) by omega.
+ ring.
+Qed.
+End Z.
Section Prod.
Definition Mul256 (out src1 src2 tmp : register) (cont : expr) : expr :=
@@ -62,6 +85,32 @@ Section Prod.
(Ret x))))))))))))))).
End Prod.
+Ltac cleanup :=
+ cbn - [interp spec cc_spec];
+ rewrite ?reg_eqb_refl, ?reg_eqb_neq by congruence;
+ cbv [CC.update in_dec list_rec list_rect
+ CC.code_dec];
+ cbn [CC.cc_c CC.cc_m CC.cc_z CC.cc_l].
+Ltac step_and_remember :=
+ rewrite interp_step; cleanup;
+ remember_single_result.
+Ltac step_lhs :=
+ match goal with
+ | |- Spec.interp _ _ _ (Instr _ _ _ _) _ _ = _ =>
+ step_and_remember
+ end.
+Ltac step_rhs :=
+ rewrite interp_step; cleanup;
+ match goal with
+ | H: ?x = spec ?i ?args _
+ |- context [spec ?i ?args ?cc] =>
+ replace (spec i args cc) with x by idtac
+ end;
+ match goal with
+ | H : ?y = (?x mod ?m)%Z |- context [(?x mod ?m)%Z] =>
+ rewrite <-H
+ end.
+
Section ProdEquiv.
Context (wordmax : Z).
@@ -138,17 +187,13 @@ Section ProdEquiv.
(Instr (ADD 128) out (out, tmp) cont))))) cc ctx.
Proof.
intros; cbv [Mul256 interp256].
- repeat
- (rewrite interp_step; cbn - [interp spec cc_spec];
- rewrite ?reg_eqb_refl, ?reg_eqb_neq by congruence;
- remember_single_result;
- cbn [spec MUL128LL MUL128LU MUL128UL ADD] in *).
+ repeat step_lhs.
+ repeat step_rhs.
match goal with H : value_unused tmp _ |- _ => erewrite H end.
match goal with H : value_unused tmp2 _ |- _ => erewrite H end.
apply interp_state_equiv.
- { rewrite !cc_overwrite_full.
- f_equal; subst; lia. }
+ { reflexivity. }
{ intros; cbv [reg_eqb].
break_innermost_match; try congruence; reflexivity. }
Qed.
@@ -173,41 +218,162 @@ Section ProdEquiv.
value_unused tmp2 cont ->
interp256 (Mul256x256 out outHigh src1 src2 tmp cont) cc ctx =
interp256 (
- Instr MUL128LL out (src1, src2)
+ Instr MUL128UU outHigh (src1, src2)
(Instr MUL128LU tmp (src1, src2)
(Instr MUL128UL tmp2 (src1, src2)
- (Instr MUL128UU outHigh (src1, src2)
+ (Instr MUL128LL out (src1, src2)
(Instr (ADD 128) out (out, tmp2)
(Instr (ADDC (-128)) outHigh (outHigh, tmp2)
(Instr (ADD 128) out (out, tmp)
(Instr (ADDC (-128)) outHigh (outHigh, tmp) cont)))))))) cc ctx.
Proof.
intros; cbv [Mul256x256 interp256].
- repeat
- (rewrite interp_step; cbn - [interp spec cc_spec];
- rewrite ?reg_eqb_refl, ?reg_eqb_neq by congruence;
- remember_single_result;
- cbn [spec MUL128LL MUL128LU MUL128UL MUL128UU ADD ADDC] in *).
+ repeat step_lhs.
+ repeat step_rhs.
match goal with H : value_unused tmp _ |- _ => erewrite H end.
match goal with H : value_unused tmp2 _ |- _ => erewrite H end.
- apply interp_state_equiv.
- { rewrite !cc_overwrite_full.
- f_equal.
- subst. cbn - [Z.add Z.modulo Z.testbit Z.mul Z.shiftl lower128 upper128].
- lia. }
- { intros; cbv [reg_eqb].
- break_innermost_match; try congruence; try reflexivity; [ ].
- subst. cbn - [Z.add Z.modulo Z.testbit Z.mul Z.shiftl lower128 upper128].
- lia. }
+ apply interp_state_equiv; [ reflexivity | ].
+ intros; cbv [reg_eqb].
+ break_innermost_match; try congruence; reflexivity.
+ Qed.
+
+ Lemma interp_add_chain a b c cont cc ctx:
+ a <> b ->
+ a <> c ->
+ b <> c ->
+ (0 <= ctx a < wordmax)%Z ->
+ (0 <= ctx b < wordmax)%Z ->
+ (0 <= ctx c < wordmax)%Z ->
+ (wordmax = 2 ^ 256)%Z ->
+ let result := (ctx a + Z.shiftl (ctx c) 128 + ctx b * wordmax)%Z in
+ interp256 (
+ Instr (ADD 128) a (a, c)
+ (Instr (ADDC (-128)) b (b, c) cont))
+ cc ctx =
+ interp256 cont
+ (CC.update [CC.C; CC.M; CC.L; CC.Z]
+ (result / wordmax) cc_spec cc)
+ (fun r =>
+ if reg_eqb r a
+ then (result mod wordmax)%Z
+ else
+ if reg_eqb r b
+ then ((result / wordmax) mod wordmax)%Z
+ else ctx r).
+ Proof.
+ intros; cbv [interp256].
+ repeat step_and_remember.
+ cbn [spec ADD ADDC CC.cc_c] in *.
+ replace x0 with (result / wordmax)%Z in *.
+
+ (* TODO: this is a stupidly ugly arithmetic proof *)
+ 2 : { subst. subst result.
+ rewrite Z.shiftl_mul_pow2 by omega.
+ rewrite Z.shiftl_div_pow2 by omega.
+ rewrite Pos2Z.opp_neg. (* TODO : add to zsimplify? *)
+ autorewrite with zsimplify.
+
+ match goal with
+ |- context [if ?x then 1 else 0] =>
+ change (if x then 1 else 0) with (Z.b2z x)
+ end.
+ cbv [cc_spec].
+ rewrite Z.testbit_spec' by omega.
+
+ rewrite Z.mod_small with (b:=2).
+ 2 : { split; [ Z.zero_bounds | ].
+ apply Z.div_lt_upper_bound; try lia.
+ match goal with
+ |- context [ ?x mod ?y ] =>
+ pose proof (Z.mod_pos_bound x y ltac:(lia))
+ end.
+ lia. }
+
+ autorewrite with zsimplify.
+
+ rewrite Z.div_add_mod_cond_r' by omega.
+ rewrite Z.mod_small with (a := ctx c / 2 ^ 128)
+ by (split; [ Z.zero_bounds | apply Z.div_lt_upper_bound; lia ]).
+ assert (0 < 2 ^ 128) by (cbn; omega).
+ change (2 ^ 256)%Z with (2 ^ 128 * 2 ^ 128)%Z.
+ rewrite Z.div_mul_cancel_r by omega.
+ ring. }
+
+ apply interp_state_equiv; [ reflexivity | ].
+ intros; cbv [reg_eqb].
+ break_innermost_match; try congruence; try reflexivity.
+ { subst. subst result.
+ autorewrite with zsimplify.
+ pull_Zmod. reflexivity. }
+ Qed.
+
+ Definition flags_unused e wordmax : Prop :=
+ forall cc x ctx,
+ interp reg_eqb wordmax cc_spec e cc ctx =
+ interp reg_eqb wordmax cc_spec e (CC.update [CC.C; CC.M; CC.L; CC.Z] x cc_spec cc) ctx.
+
+ Lemma swap_add_chain a b c d cont cc ctx:
+ a <> b ->
+ a <> c ->
+ a <> d ->
+ b <> c ->
+ b <> d ->
+ c <> d ->
+ 0 <= ctx a < wordmax ->
+ 0 <= ctx b < wordmax ->
+ 0 <= ctx c < wordmax ->
+ 0 <= ctx d < wordmax ->
+ wordmax = 2 ^ 256 ->
+ flags_unused cont wordmax ->
+ interp256 (
+ Instr (ADD 128) a (a, c)
+ (Instr (ADDC (-128)) b (b, c)
+ (Instr (ADD 128) a (a, d)
+ (Instr (ADDC (-128)) b (b, d) cont))))
+ cc ctx =
+ interp256 (
+ Instr (ADD 128) a (a, d)
+ (Instr (ADDC (-128)) b (b, d)
+ (Instr (ADD 128) a (a, c)
+ (Instr (ADDC (-128)) b (b, c) cont))))
+ cc ctx.
+ Proof.
+ intros.
+ assert (0 < wordmax) by omega.
+ repeat (rewrite interp_add_chain by (rewrite ?reg_eqb_refl, ?reg_eqb_neq by congruence; try assumption; auto using Z.mod_pos_bound with omega)).
+ rewrite ?reg_eqb_refl, ?reg_eqb_neq by congruence.
+ match goal with
+ | H : flags_unused ?e _
+ |- interp256 ?e ?ccl _ = interp256 ?e ?ccr _ =>
+ rewrite H with (cc:=ccl) (x:=0);
+ rewrite H with (cc:=ccr) (x:=0)
+ end.
+ rewrite !cc_overwrite_full.
+ apply interp_state_equiv; [ reflexivity | ].
+ intros; cbv [reg_eqb].
+ break_innermost_match; try congruence; try reflexivity.
+ { autorewrite with zsimplify. pull_Zmod.
+ f_equal; ring. }
+ { autorewrite with zsimplify. pull_Zmod.
+ rewrite !Z.div_add_mod_cond_l' by (subst; cbn; omega).
+ ring_simplify.
+ (* This is really annoying, can't a tactic do this? *)
+ match goal with |- context [?a + ?b + ?c] =>
+ match goal with |- context [?a + ?c + ?b] =>
+ replace (a + b + c) with (a + c + b) by ring
+ end end.
+ f_equal. ring. }
Qed.
End ProdEquiv.
Ltac push_value_unused :=
repeat match goal with
- | |- ~ In _ _ => cbn; intuition; congruence
+ | _ => apply value_unused_skip; [ | assumption | ]
| _ => apply value_unused_overwrite
- | _ => apply value_unused_skip; [ | congruence | ]
| _ => apply value_unused_ret; congruence
+ | _ => apply not_in_cons; split;
+ [ try assumption; symmetry; assumption | ]
+ | _ => apply in_nil
end. \ No newline at end of file