diff options
-rw-r--r-- | .depend | 10 | ||||
-rw-r--r-- | Changelog | 1 | ||||
-rw-r--r-- | arm/Op.v | 31 | ||||
-rw-r--r-- | backend/CSE.v | 104 | ||||
-rw-r--r-- | backend/CSEproof.v | 477 | ||||
-rw-r--r-- | driver/Compiler.v | 4 | ||||
-rw-r--r-- | ia32/Op.v | 55 | ||||
-rw-r--r-- | lib/Integers.v | 50 | ||||
-rw-r--r-- | powerpc/Op.v | 55 |
9 files changed, 556 insertions, 231 deletions
@@ -42,8 +42,8 @@ $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob: $(ARCH)/ConstpropOp.v lib/Coqli backend/Constprop.vo backend/Constprop.glob: backend/Constprop.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo backend/Constpropproof.vo backend/Constpropproof.glob: backend/Constpropproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo $(ARCH)/ConstpropOpproof.vo -backend/CSE.vo backend/CSE.glob: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo -backend/CSEproof.vo backend/CSEproof.glob: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/CSE.vo +backend/CSE.vo backend/CSE.glob: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo +backend/CSEproof.vo backend/CSEproof.glob: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo backend/CSE.vo $(ARCH)/Machregs.vo $(ARCH)/Machregs.glob: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo backend/Locations.vo backend/Locations.glob: backend/Locations.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo $(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo backend/Locations.vo @@ -86,9 +86,9 @@ backend/Stackingtyping.vo backend/Stackingtyping.glob: backend/Stackingtyping.v backend/Machsem.vo backend/Machsem.glob: backend/Machsem.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo $(ARCH)/$(VARIANT)/Stacklayout.vo $(ARCH)/Asmgenretaddr.vo $(ARCH)/Asm.vo $(ARCH)/Asm.glob: $(ARCH)/Asm.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Conventions.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob: $(ARCH)/Asmgen.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo -$(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenretaddr.glob: $(ARCH)/Asmgenretaddr.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo -$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo -$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo backend/Conventions.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenproof1.vo +$(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenretaddr.glob: $(ARCH)/Asmgenretaddr.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo +$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo +$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenproof1.vo cfrontend/Csyntax.vo cfrontend/Csyntax.glob: cfrontend/Csyntax.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo cfrontend/Csem.vo cfrontend/Csem.glob: cfrontend/Csem.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo common/Memory.vo common/Events.vo common/Globalenvs.vo cfrontend/Csyntax.vo common/Smallstep.vo cfrontend/Cstrategy.vo cfrontend/Cstrategy.glob: cfrontend/Cstrategy.v lib/Axioms.vo lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo @@ -20,6 +20,7 @@ Performance improvements: - Shorter, more efficient code generated for accessing volatile global variables. - Better code generated for && and || outside conditional tests. +- More aggressive common subexpression elimination (CSE) of memory loads. - Improved register allocation for invocations of built-ins, especially for annotations. - In Cminor and down, make safe operators non-strict: they return Vundef @@ -610,6 +610,37 @@ Proof. intros. destruct c; simpl; auto; congruence. Qed. +(** Checking whether two addressings, applied to the same arguments, produce + separated memory addresses. Used in [CSE]. *) + +Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing) + (chunk2: memory_chunk) (addr2: addressing) : bool := + match addr1, addr2 with + | Aindexed ofs1, Aindexed ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | Ainstack ofs1, Ainstack ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | _, _ => false + end. + +Lemma addressing_separated_sound: + forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2, + addressing_separated chunk1 addr1 chunk2 addr2 = true -> + eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) -> + eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) -> + b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1. +Proof. + unfold addressing_separated; intros. + generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2. + destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv. +(* Aindexed *) + destruct v; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +(* Ainstack *) + destruct sp; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +Qed. + (** * Invariance and compatibility properties. *) (** [eval_operation] and [eval_addressing] depend on a global environment diff --git a/backend/CSE.v b/backend/CSE.v index 44ed590..ba8dec8 100644 --- a/backend/CSE.v +++ b/backend/CSE.v @@ -15,6 +15,7 @@ Require Import Coqlib. Require Import Maps. +Require Import Errors. Require Import AST. Require Import Integers. Require Import Floats. @@ -24,6 +25,7 @@ Require Import Globalenvs. Require Import Op. Require Import Registers. Require Import RTL. +Require Import RTLtyping. Require Import Kildall. (** * Value numbering *) @@ -188,26 +190,57 @@ Definition add_unknown (n: numbering) (rd: reg) := n.(num_eqs) (PTree.set rd n.(num_next) n.(num_reg)). -(** [kill_load n] removes all equations involving memory loads, - as well as those involving memory-dependent operators. - It is used to reflect the effect of a memory store, which can - potentially invalidate all such equations. *) +(** [kill_equations pred n] remove all equations satisfying predicate [pred]. *) -Fixpoint kill_load_eqs (eqs: list (valnum * rhs)) : list (valnum * rhs) := +Fixpoint kill_eqs (pred: rhs -> bool) (eqs: list (valnum * rhs)) : list (valnum * rhs) := match eqs with | nil => nil - | eq :: rem => - match eq with - | (_, Op op _) => if op_depends_on_memory op then kill_load_eqs rem else eq :: kill_load_eqs rem - | (_, Load _ _ _) => kill_load_eqs rem - end + | eq :: rem => if pred (snd eq) then kill_eqs pred rem else eq :: kill_eqs pred rem end. -Definition kill_loads (n: numbering) : numbering := +Definition kill_equations (pred: rhs -> bool) (n: numbering) : numbering := mknumbering n.(num_next) - (kill_load_eqs n.(num_eqs)) + (kill_eqs pred n.(num_eqs)) n.(num_reg). +(** [kill_loads n] removes all equations involving memory loads, + as well as those involving memory-dependent operators. + It is used to reflect the effect of a builtin operation, which can + change memory in unpredictable ways and potentially invalidate all such equations. *) + +Definition filter_loads (r: rhs) : bool := + match r with + | Op op _ => op_depends_on_memory op + | Load _ _ _ => true + end. + +Definition kill_loads (n: numbering) : numbering := + kill_equations filter_loads n. + +(** [add_store n chunk addr rargs rsrc] updates the numbering [n] to reflect + the effect of a store instruction [Istore chunk addr rargs rsrc]. + Equations involving the memory state are removed from [n], unless we + can prove that the store does not invalidate them. + Then, an equations [rsrc = Load chunk addr rargs] is added to reflect + the known content of the stored memory area, but only if [chunk] is + a "full-size" quantity ([Mint32] or [Mfloat64]). *) + +Definition filter_after_store (chunk: memory_chunk) (addr: addressing) (vl: list valnum) (r: rhs) : bool := + match r with + | Op op vl' => op_depends_on_memory op + | Load chunk' addr' vl' => + negb(eq_list_valnum vl vl' && addressing_separated chunk addr chunk' addr') + end. + +Definition add_store (n: numbering) (chunk: memory_chunk) (addr: addressing) + (rargs: list reg) (rsrc: reg) : numbering := + let (n1, vargs) := valnum_regs n rargs in + let n2 := kill_equations (filter_after_store chunk addr vargs) n1 in + match chunk with + | Mint32 | Mfloat64 => add_rhs n2 rsrc (Load chunk addr vargs) + | _ => n2 + end. + (* [reg_valnum n vn] returns a register that is mapped to value number [vn], or [None] if no such register exists. *) @@ -339,7 +372,7 @@ Definition transfer (f: function) (pc: node) (before: numbering) := | Iload chunk addr args dst s => add_load before dst chunk addr args | Istore chunk addr args src s => - kill_loads before + add_store before chunk addr args src | Icall sig ros args res s => empty_numbering | Itailcall sig ros args => @@ -358,16 +391,10 @@ Definition transfer (f: function) (pc: node) (before: numbering) := (** The static analysis solves the dataflow inequations implied by the [transfer] function using the ``extended basic block'' solver, which produces sub-optimal solutions quickly. The result is - a mapping from program points to numberings. In the unlikely - case where the solver fails to find a solution, we simply associate - empty numberings to all program points, which is semantically correct - and effectively deactivates the CSE optimization. *) - -Definition analyze (f: RTL.function): PMap.t numbering := - match Solver.fixpoint (successors f) (transfer f) f.(fn_entrypoint) with - | None => PMap.init empty_numbering - | Some res => res - end. + a mapping from program points to numberings. *) + +Definition analyze (f: RTL.function): option (PMap.t numbering) := + Solver.fixpoint (successors f) (transfer f) f.(fn_entrypoint). (** * Code transformation *) @@ -400,18 +427,25 @@ Definition transf_instr (n: numbering) (instr: instruction) := Definition transf_code (approxs: PMap.t numbering) (instrs: code) : code := PTree.map (fun pc instr => transf_instr approxs!!pc instr) instrs. -Definition transf_function (f: function) : function := - let approxs := analyze f in - mkfunction - f.(fn_sig) - f.(fn_params) - f.(fn_stacksize) - (transf_code approxs f.(fn_code)) - f.(fn_entrypoint). +Definition transf_function (f: function) : res function := + match type_function f with + | Error msg => Error msg + | OK tyenv => + match analyze f with + | None => Error (msg "CSE failure") + | Some approxs => + OK(mkfunction + f.(fn_sig) + f.(fn_params) + f.(fn_stacksize) + (transf_code approxs f.(fn_code)) + f.(fn_entrypoint)) + end + end. -Definition transf_fundef (f: fundef) : fundef := - AST.transf_fundef transf_function f. +Definition transf_fundef (f: fundef) : res fundef := + AST.transf_partial_fundef transf_function f. -Definition transf_program (p: program) : program := - transform_program transf_fundef p. +Definition transf_program (p: program) : res program := + transform_partial_program transf_fundef p. diff --git a/backend/CSEproof.v b/backend/CSEproof.v index c685ef6..6543826 100644 --- a/backend/CSEproof.v +++ b/backend/CSEproof.v @@ -15,6 +15,7 @@ Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Errors. Require Import Integers. Require Import Floats. Require Import Values. @@ -25,6 +26,7 @@ Require Import Smallstep. Require Import Op. Require Import Registers. Require Import RTL. +Require Import RTLtyping. Require Import Kildall. Require Import CSE. @@ -203,25 +205,37 @@ Proof. apply Plt_trans_succ. eauto. Qed. -Lemma kill_load_eqs_incl: - forall eqs, List.incl (kill_load_eqs eqs) eqs. +Remark kill_eqs_in: + forall pred v rhs eqs, + In (v, rhs) (kill_eqs pred eqs) -> In (v, rhs) eqs /\ pred rhs = false. Proof. - induction eqs; simpl; intros. - apply incl_refl. - destruct a. destruct r. - destruct (op_depends_on_memory o). auto with coqlib. - apply incl_same_head; auto. - auto with coqlib. + induction eqs; simpl; intros. + tauto. + destruct (pred (snd a)) as []_eqn. + exploit IHeqs; eauto. tauto. + simpl in H; destruct H. subst a. auto. exploit IHeqs; eauto. tauto. Qed. -Lemma wf_kill_loads: - forall n, wf_numbering n -> wf_numbering (kill_loads n). +Lemma wf_kill_equations: + forall pred n, wf_numbering n -> wf_numbering (kill_equations pred n). Proof. - intros. inversion H. unfold kill_loads; split; simpl; intros. - apply H0. apply kill_load_eqs_incl. auto. + intros. inversion H. unfold kill_equations; split; simpl; intros. + exploit kill_eqs_in; eauto. intros [A B]. auto. eauto. Qed. +Lemma wf_add_store: + forall n chunk addr rargs rsrc, + wf_numbering n -> wf_numbering (add_store n chunk addr rargs rsrc). +Proof. + intros. unfold add_store. + destruct (valnum_regs n rargs) as [n1 vargs]_eqn. + exploit wf_valnum_regs; eauto. intros [A [B C]]. + assert (wf_numbering (kill_equations (filter_after_store chunk addr vargs) n1)). + apply wf_kill_equations. auto. + destruct chunk; auto; apply wf_add_rhs; auto. +Qed. + Lemma wf_transfer: forall f pc n, wf_numbering n -> wf_numbering (transfer f pc n). Proof. @@ -230,25 +244,22 @@ Proof. destruct i; auto. apply wf_add_op; auto. apply wf_add_load; auto. - apply wf_kill_loads; auto. + apply wf_add_store; auto. apply wf_empty_numbering. apply wf_empty_numbering. - apply wf_add_unknown. apply wf_kill_loads; auto. + apply wf_add_unknown. apply wf_kill_equations; auto. Qed. (** As a consequence, the numberings computed by the static analysis are well formed. *) Theorem wf_analyze: - forall f pc, wf_numbering (analyze f)!!pc. + forall f approx pc, analyze f = Some approx -> wf_numbering approx!!pc. Proof. unfold analyze; intros. - caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)). - intros approx EQ. eapply Solver.fixpoint_invariant with (P := wf_numbering); eauto. exact wf_empty_numbering. exact (wf_transfer f). - intro. rewrite PMap.gi. apply wf_empty_numbering. Qed. (** ** Properties of satisfiability of numberings *) @@ -264,7 +275,6 @@ Section SATISFIABILITY. Variable ge: genv. Variable sp: val. -Variable m: mem. (** Agremment between two mappings from value numbers to values up to a given value number. *) @@ -303,7 +313,7 @@ Qed. extensional with respect to [valu_agree]. *) Lemma numbering_holds_exten: - forall valu1 valu2 n rs, + forall valu1 valu2 n rs m, valu_agree valu1 valu2 n.(num_next) -> wf_numbering n -> numbering_holds valu1 ge sp rs m n -> @@ -328,7 +338,7 @@ Qed. value numbers. *) Lemma valnum_reg_holds: - forall valu1 rs n r n' v, + forall valu1 rs n r n' v m, wf_numbering n -> numbering_holds valu1 ge sp rs m n -> valnum_reg n r = (n', v) -> @@ -351,22 +361,21 @@ Proof. assert (AG: valu_agree valu1 valu2 n.(num_next)). red; intros. unfold valu2. apply VMap.gso. auto with coqlib. - elim (numbering_holds_exten _ _ _ _ AG H0 H1); intros. + destruct (numbering_holds_exten _ _ _ _ _ AG H0 H1) as [A B]. exists valu2. split. split; simpl; intros. auto. unfold valu2, VMap.set, ValnumEq.eq. - rewrite PTree.gsspec in H7. destruct (peq r0 r). - inversion H7. rewrite peq_true. congruence. - case (peq vn (num_next n)); intro. - inversion H0. generalize (H9 _ _ H7). rewrite e. intro. - elim (Plt_strict _ H10). - auto. - split. unfold valu2. apply VMap.gss. + rewrite PTree.gsspec in H5. destruct (peq r0 r). + inv H5. rewrite peq_true. auto. + rewrite peq_false. auto. + assert (Plt vn (num_next n)). inv H0. eauto. + red; intros; subst; eapply Plt_strict; eauto. + split. unfold valu2. rewrite VMap.gss. auto. auto. Qed. Lemma valnum_regs_holds: - forall rs rl valu1 n n' vl, + forall rs rl valu1 n n' vl m, wf_numbering n -> numbering_holds valu1 ge sp rs m n -> valnum_regs n rl = (n', vl) -> @@ -378,15 +387,15 @@ Proof. induction rl; simpl; intros. (* base case *) inversion H1; subst n'; subst vl. - exists valu1. split. auto. split. reflexivity. apply valu_agree_refl. + exists valu1. split. auto. split. auto. apply valu_agree_refl. (* inductive case *) caseEq (valnum_reg n a); intros n1 v1 EQ1. caseEq (valnum_regs n1 rl); intros ns vs EQs. rewrite EQ1 in H1; rewrite EQs in H1. inversion H1. subst vl; subst n'. - generalize (valnum_reg_holds _ _ _ _ _ _ H H0 EQ1). + generalize (valnum_reg_holds _ _ _ _ _ _ _ H H0 EQ1). intros [valu2 [A [B C]]]. generalize (wf_valnum_reg _ _ _ _ H EQ1). intros [D [E F]]. - generalize (IHrl _ _ _ _ D A EQs). + generalize (IHrl _ _ _ _ _ D A EQs). intros [valu3 [P [Q R]]]. exists valu3. split. auto. @@ -398,7 +407,7 @@ Qed. of the value to which a right-hand side of an equation evaluates. *) Definition rhs_evals_to - (valu: valnum -> val) (rh: rhs) (v: val) : Prop := + (valu: valnum -> val) (rh: rhs) (m: mem) (v: val) : Prop := match rh with | Op op vl => eval_operation ge sp op (List.map valu vl) m = Some v @@ -409,23 +418,23 @@ Definition rhs_evals_to end. Lemma equation_evals_to_holds_1: - forall valu rh v vres, - rhs_evals_to valu rh v -> + forall valu rh v vres m, + rhs_evals_to valu rh m v -> equation_holds valu ge sp m vres rh -> valu vres = v. Proof. - intros until vres. unfold rhs_evals_to, equation_holds. + intros until m. unfold rhs_evals_to, equation_holds. destruct rh. congruence. intros [a1 [A1 B1]] [a2 [A2 B2]]. congruence. Qed. Lemma equation_evals_to_holds_2: - forall valu rh v vres, + forall valu rh v vres m, wf_rhs vres rh -> - rhs_evals_to valu rh v -> + rhs_evals_to valu rh m v -> equation_holds (VMap.set vres v valu) ge sp m vres rh. Proof. - intros until vres. unfold wf_rhs, rhs_evals_to, equation_holds. + intros until m. unfold wf_rhs, rhs_evals_to, equation_holds. rewrite VMap.gss. assert (forall vl: list valnum, (forall v, In v vl -> Plt v vres) -> @@ -438,13 +447,14 @@ Qed. (** The numbering obtained by adding an equation [rd = rhs] is satisfiable in a concrete register set where [rd] is set to the value of [rhs]. *) -Lemma add_rhs_satisfiable: - forall n rh valu1 rs rd v, +Lemma add_rhs_satisfiable_gen: + forall n rh valu1 rs rd rs' m, wf_numbering n -> wf_rhs n.(num_next) rh -> numbering_holds valu1 ge sp rs m n -> - rhs_evals_to valu1 rh v -> - numbering_satisfiable ge sp (rs#rd <- v) m (add_rhs n rd rh). + rhs_evals_to valu1 rh m (rs'#rd) -> + (forall r, r <> rd -> rs'#r = rs#r) -> + numbering_satisfiable ge sp rs' m (add_rhs n rd rh). Proof. intros. unfold add_rhs. caseEq (find_valnum_rhs rh n.(num_eqs)). @@ -452,34 +462,45 @@ Proof. intros vres FINDVN. inversion H1. exists valu1. split; simpl; intros. auto. - rewrite Regmap.gsspec. - rewrite PTree.gsspec in H5. + rewrite PTree.gsspec in H6. destruct (peq r rd). + inv H6. symmetry. eapply equation_evals_to_holds_1; eauto. - apply H3. apply find_valnum_rhs_correct. congruence. - auto. + apply H4. apply find_valnum_rhs_correct. congruence. + rewrite H3; auto. (* RHS not found *) intro FINDVN. - set (valu2 := VMap.set n.(num_next) v valu1). + set (valu2 := VMap.set n.(num_next) (rs'#rd) valu1). assert (AG: valu_agree valu1 valu2 n.(num_next)). red; intros. unfold valu2. apply VMap.gso. auto with coqlib. - elim (numbering_holds_exten _ _ _ _ AG H H1); intros. + elim (numbering_holds_exten _ _ _ _ _ AG H H1); intros. exists valu2. split; simpl; intros. - elim H5; intro. - inversion H6; subst vn; subst rh0. - unfold valu2. eapply equation_evals_to_holds_2; eauto. - auto. - rewrite Regmap.gsspec. rewrite PTree.gsspec in H5. destruct (peq r rd). - unfold valu2. inversion H5. symmetry. apply VMap.gss. - auto. + destruct H6. + inv H6. unfold valu2. eapply equation_evals_to_holds_2; eauto. auto. + rewrite PTree.gsspec in H6. destruct (peq r rd). + unfold valu2. inv H6. rewrite VMap.gss. auto. + rewrite H3; auto. +Qed. + +Lemma add_rhs_satisfiable: + forall n rh valu1 rs rd v m, + wf_numbering n -> + wf_rhs n.(num_next) rh -> + numbering_holds valu1 ge sp rs m n -> + rhs_evals_to valu1 rh m v -> + numbering_satisfiable ge sp (rs#rd <- v) m (add_rhs n rd rh). +Proof. + intros. eapply add_rhs_satisfiable_gen; eauto. + rewrite Regmap.gss; auto. + intros. apply Regmap.gso; auto. Qed. (** [add_op] returns a numbering that is satisfiable in the register set after execution of the corresponding [Iop] instruction. *) Lemma add_op_satisfiable: - forall n rs op args dst v, + forall n rs op args dst v m, wf_numbering n -> numbering_satisfiable ge sp rs m n -> eval_operation ge sp op rs##args m = Some v -> @@ -491,13 +512,13 @@ Proof. intros arg EQ. destruct (is_move_operation_correct _ _ EQ) as [A B]. subst op args. caseEq (valnum_reg n arg). intros n1 v1 VL. - generalize (valnum_reg_holds _ _ _ _ _ _ H H2 VL). intros [valu2 [A [B C]]]. + generalize (valnum_reg_holds _ _ _ _ _ _ _ H H2 VL). intros [valu2 [A [B C]]]. generalize (wf_valnum_reg _ _ _ _ H VL). intros [D [E F]]. elim A; intros. exists valu2; split; simpl; intros. auto. rewrite Regmap.gsspec. rewrite PTree.gsspec in H5. destruct (peq r dst). simpl in H1. congruence. auto. intro NEQ. caseEq (valnum_regs n args). intros n1 vl VRL. - generalize (valnum_regs_holds _ _ _ _ _ _ H H2 VRL). intros [valu2 [A [B C]]]. + generalize (valnum_regs_holds _ _ _ _ _ _ _ H H2 VRL). intros [valu2 [A [B C]]]. generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]]. apply add_rhs_satisfiable with valu2; auto. simpl. congruence. @@ -507,19 +528,17 @@ Qed. set after execution of the corresponding [Iload] instruction. *) Lemma add_load_satisfiable: - forall n rs chunk addr args dst a v, + forall n rs chunk addr args dst a v m, wf_numbering n -> numbering_satisfiable ge sp rs m n -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = Some v -> - numbering_satisfiable ge sp - (rs#dst <- v) - m (add_load n dst chunk addr args). + numbering_satisfiable ge sp (rs#dst <- v) m (add_load n dst chunk addr args). Proof. intros. inversion H0. unfold add_load. caseEq (valnum_regs n args). intros n1 vl VRL. - generalize (valnum_regs_holds _ _ _ _ _ _ H H3 VRL). intros [valu2 [A [B C]]]. + generalize (valnum_regs_holds _ _ _ _ _ _ _ H H3 VRL). intros [valu2 [A [B C]]]. generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]]. apply add_rhs_satisfiable with valu2; auto. simpl. exists a; split; congruence. @@ -529,11 +548,10 @@ Qed. register set after setting the target register to any value. *) Lemma add_unknown_satisfiable: - forall n rs dst v, + forall n rs dst v m, wf_numbering n -> numbering_satisfiable ge sp rs m n -> - numbering_satisfiable ge sp - (rs#dst <- v) m (add_unknown n dst). + numbering_satisfiable ge sp (rs#dst <- v) m (add_unknown n dst). Proof. intros. destruct H0 as [valu A]. set (valu' := VMap.set n.(num_next) v valu). @@ -548,39 +566,77 @@ Proof. eauto. Qed. -(** [kill_load] preserves satisfiability. Moreover, the resulting numbering - is satisfiable in any concrete memory state. *) +(** Satisfiability of [kill_equations]. *) -Lemma kill_load_eqs_ops: - forall v rhs eqs, - In (v, rhs) (kill_load_eqs eqs) -> - match rhs with - | Op op _ => op_depends_on_memory op = false - | Load _ _ _ => False - end. +Lemma kill_equations_holds: + forall pred valu n rs m m', + (forall v r, + equation_holds valu ge sp m v r -> pred r = false -> equation_holds valu ge sp m' v r) -> + numbering_holds valu ge sp rs m n -> + numbering_holds valu ge sp rs m' (kill_equations pred n). Proof. - induction eqs; simpl; intros. - elim H. - destruct a. destruct r. destruct (op_depends_on_memory o) as [] _eqn. - apply IHeqs; auto. - simpl in H; destruct H. inv H. auto. - apply IHeqs. auto. - apply IHeqs. auto. + intros. destruct H0 as [A B]. red; simpl. split; intros. + exploit kill_eqs_in; eauto. intros [C D]. eauto. + auto. Qed. -Lemma kill_load_satisfiable: - forall n rs m', +(** [kill_loads] preserves satisfiability. Moreover, the resulting numbering + is satisfiable in any concrete memory state. *) + +Lemma kill_loads_satisfiable: + forall n rs m m', numbering_satisfiable ge sp rs m n -> numbering_satisfiable ge sp rs m' (kill_loads n). Proof. - intros. inv H. destruct H0. generalize (kill_load_eqs_incl n.(num_eqs)). intro. - exists x. split; intros. - generalize (H _ _ (H1 _ H2)). - generalize (kill_load_eqs_ops _ _ _ H2). - destruct rh; simpl. - intros. rewrite <- H4. apply op_depends_on_memory_correct; auto. - tauto. - apply H0. auto. + intros. destruct H as [valu A]. exists valu. eapply kill_equations_holds with (m := m); eauto. + intros. destruct r; simpl in *. rewrite <- H. apply op_depends_on_memory_correct; auto. + congruence. +Qed. + +(** [add_store] returns a numbering that is satisfiable in the memory state + after execution of the corresponding [Istore] instruction. *) + +Lemma add_store_satisfiable: + forall n rs chunk addr args src a m m', + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + eval_addressing ge sp addr rs##args = Some a -> + Mem.storev chunk m a (rs#src) = Some m' -> + Val.has_type (rs#src) (type_of_chunk chunk) -> + numbering_satisfiable ge sp rs m' (add_store n chunk addr args src). +Proof. + intros. unfold add_store. destruct H0 as [valu A]. + destruct (valnum_regs n args) as [n1 vargs]_eqn. + exploit valnum_regs_holds; eauto. intros [valu' [B [C D]]]. + exploit wf_valnum_regs; eauto. intros [U [V W]]. + set (n2 := kill_equations (filter_after_store chunk addr vargs) n1). + assert (numbering_holds valu' ge sp rs m' n2). + apply kill_equations_holds with (m := m); auto. + intros. destruct r; simpl in *. + rewrite <- H0. apply op_depends_on_memory_correct; auto. + destruct H0 as [a' [P Q]]. + destruct (eq_list_valnum vargs l); simpl in H4; try congruence. subst l. + rewrite negb_false_iff in H4. + exists a'; split; auto. + destruct a; simpl in H2; try congruence. + destruct a'; simpl in Q; try congruence. + simpl. rewrite <- Q. + rewrite C in P. eapply Mem.load_store_other; eauto. + exploit addressing_separated_sound; eauto. intuition congruence. + assert (N2: numbering_satisfiable ge sp rs m' n2). + exists valu'; auto. + set (n3 := add_rhs n2 src (Load chunk addr vargs)). + assert (N3: Val.load_result chunk (rs#src) = rs#src -> numbering_satisfiable ge sp rs m' n3). + intro EQ. unfold n3. apply add_rhs_satisfiable_gen with valu' rs. + apply wf_kill_equations; auto. + red. auto. auto. + red. exists a; split. congruence. + rewrite <- EQ. destruct a; simpl in H2; try discriminate. simpl. + eapply Mem.load_store_same; eauto. + auto. + destruct chunk; auto; apply N3. + simpl in H3. destruct (rs#src); auto || contradiction. + simpl in H3. destruct (rs#src); auto || contradiction. Qed. (** Correctness of [reg_valnum]: if it returns a register [r], @@ -613,10 +669,10 @@ Qed. result value of the operation or memory load. *) Lemma find_rhs_correct: - forall valu rs n rh r, + forall valu rs m n rh r, numbering_holds valu ge sp rs m n -> find_rhs n rh = Some r -> - rhs_evals_to valu rh rs#r. + rhs_evals_to valu rh m rs#r. Proof. intros until r. intros NH. unfold find_rhs. @@ -630,7 +686,7 @@ Proof. Qed. Lemma find_op_correct: - forall rs n op args r, + forall rs m n op args r, wf_numbering n -> numbering_satisfiable ge sp rs m n -> find_op n op args = Some r -> @@ -638,15 +694,15 @@ Lemma find_op_correct: Proof. intros until r. intros WF [valu NH]. unfold find_op. caseEq (valnum_regs n args). intros n' vl VR FIND. - generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR). + generalize (valnum_regs_holds _ _ _ _ _ _ _ WF NH VR). intros [valu2 [NH2 [EQ AG]]]. rewrite <- EQ. - change (rhs_evals_to valu2 (Op op vl) rs#r). + change (rhs_evals_to valu2 (Op op vl) m rs#r). eapply find_rhs_correct; eauto. Qed. Lemma find_load_correct: - forall rs n chunk addr args r, + forall rs m n chunk addr args r, wf_numbering n -> numbering_satisfiable ge sp rs m n -> find_load n chunk addr args = Some r -> @@ -656,10 +712,10 @@ Lemma find_load_correct: Proof. intros until r. intros WF [valu NH]. unfold find_load. caseEq (valnum_regs n args). intros n' vl VR FIND. - generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR). + generalize (valnum_regs_holds _ _ _ _ _ _ _ WF NH VR). intros [valu2 [NH2 [EQ AG]]]. rewrite <- EQ. - change (rhs_evals_to valu2 (Load chunk addr vl) rs#r). + change (rhs_evals_to valu2 (Load chunk addr vl) m rs#r). eapply find_rhs_correct; eauto. Qed. @@ -672,38 +728,29 @@ End SATISFIABILITY. satisfiability at [pc']. *) Theorem analysis_correct_1: - forall ge sp rs m f pc pc' i, + forall ge sp rs m f approx pc pc' i, + analyze f = Some approx -> f.(fn_code)!pc = Some i -> In pc' (successors_instr i) -> - numbering_satisfiable ge sp rs m (transfer f pc (analyze f)!!pc) -> - numbering_satisfiable ge sp rs m (analyze f)!!pc'. -Proof. - intros until i. - generalize (wf_analyze f pc). - unfold analyze. - caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)). - intros res FIXPOINT WF AT SUCC. - assert (Numbering.ge res!!pc' (transfer f pc res!!pc)). + numbering_satisfiable ge sp rs m (transfer f pc approx!!pc) -> + numbering_satisfiable ge sp rs m approx!!pc'. +Proof. + intros. + assert (Numbering.ge approx!!pc' (transfer f pc approx!!pc)). eapply Solver.fixpoint_solution; eauto. unfold successors_list, successors. rewrite PTree.gmap1. - rewrite AT. auto. - apply H. - intros. rewrite PMap.gi. apply empty_numbering_satisfiable. + rewrite H0. auto. + apply H3. auto. Qed. Theorem analysis_correct_entry: - forall ge sp rs m f, - numbering_satisfiable ge sp rs m (analyze f)!!(f.(fn_entrypoint)). + forall ge sp rs m f approx, + analyze f = Some approx -> + numbering_satisfiable ge sp rs m approx!!(f.(fn_entrypoint)). Proof. intros. - replace ((analyze f)!!(f.(fn_entrypoint))) - with empty_numbering. + replace (approx!!(f.(fn_entrypoint))) with Solver.L.top. apply empty_numbering_satisfiable. - unfold analyze. - caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)). - intros res FIXPOINT. - symmetry. change empty_numbering with Solver.L.top. - eapply Solver.fixpoint_entry; eauto. - intros. symmetry. apply PMap.gi. + symmetry. eapply Solver.fixpoint_entry; eauto. Qed. (** * Semantic preservation *) @@ -711,40 +758,43 @@ Qed. Section PRESERVATION. Variable prog: program. -Let tprog := transf_program prog. +Variable tprog : program. +Hypothesis TRANSF: transf_program prog = OK tprog. Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. Lemma symbols_preserved: forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. -Proof (Genv.find_symbol_transf transf_fundef prog). +Proof (Genv.find_symbol_transf_partial transf_fundef prog TRANSF). Lemma varinfo_preserved: forall b, Genv.find_var_info tge b = Genv.find_var_info ge b. -Proof (Genv.find_var_info_transf transf_fundef prog). +Proof (Genv.find_var_info_transf_partial transf_fundef prog TRANSF). Lemma functions_translated: forall (v: val) (f: RTL.fundef), Genv.find_funct ge v = Some f -> - Genv.find_funct tge v = Some (transf_fundef f). -Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog). + exists tf, Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_transf_partial transf_fundef prog TRANSF). Lemma funct_ptr_translated: forall (b: block) (f: RTL.fundef), Genv.find_funct_ptr ge b = Some f -> - Genv.find_funct_ptr tge b = Some (transf_fundef f). -Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog). + exists tf, Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf. +Proof (Genv.find_funct_ptr_transf_partial transf_fundef prog TRANSF). Lemma sig_preserved: - forall f, funsig (transf_fundef f) = funsig f. + forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f. Proof. - destruct f; reflexivity. + unfold transf_fundef; intros. destruct f; monadInv H; auto. + unfold transf_function in EQ. destruct (type_function f); try discriminate. + destruct (analyze f); try discriminate. inv EQ; auto. Qed. Lemma find_function_translated: forall ros rs f, find_function ge ros rs = Some f -> - find_function tge ros rs = Some (transf_fundef f). + exists tf, find_function tge ros rs = Some tf /\ transf_fundef f = OK tf. Proof. intros until f; destruct ros; simpl. intro. apply functions_translated; auto. @@ -753,6 +803,14 @@ Proof. discriminate. Qed. +Definition transf_function' (f: function) (approxs: PMap.t numbering) : function := + mkfunction + f.(fn_sig) + f.(fn_params) + f.(fn_stacksize) + (transf_code approxs f.(fn_code)) + f.(fn_entrypoint). + (** The proof of semantic preservation is a simulation argument using diagrams of the following form: << @@ -769,38 +827,52 @@ Qed. the numbering at [pc] (returned by the static analysis) is satisfiable. *) -Inductive match_stackframes: stackframe -> stackframe -> Prop := - match_stackframes_intro: - forall res sp pc rs f, - (forall v m, numbering_satisfiable ge sp (rs#res <- v) m (analyze f)!!pc) -> +Inductive match_stackframes: list stackframe -> list stackframe -> typ -> Prop := + | match_stackframes_nil: + match_stackframes nil nil Tint + | match_stackframes_cons: + forall res sp pc rs f approx tyenv s s' ty + (ANALYZE: analyze f = Some approx) + (WTF: wt_function f tyenv) + (WTREGS: wt_regset tyenv rs) + (TYRES: tyenv res = ty) + (SAT: forall v m, numbering_satisfiable ge sp (rs#res <- v) m approx!!pc) + (STACKS: match_stackframes s s' (proj_sig_res (fn_sig f))), match_stackframes - (Stackframe res f sp pc rs) - (Stackframe res (transf_function f) sp pc rs). + (Stackframe res f sp pc rs :: s) + (Stackframe res (transf_function' f approx) sp pc rs :: s') + ty. Inductive match_states: state -> state -> Prop := | match_states_intro: - forall s sp pc rs m s' f - (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc) - (STACKS: list_forall2 match_stackframes s s'), + forall s sp pc rs m s' f approx tyenv + (ANALYZE: analyze f = Some approx) + (WTF: wt_function f tyenv) + (WTREGS: wt_regset tyenv rs) + (SAT: numbering_satisfiable ge sp rs m approx!!pc) + (STACKS: match_stackframes s s' (proj_sig_res (fn_sig f))), match_states (State s f sp pc rs m) - (State s' (transf_function f) sp pc rs m) + (State s' (transf_function' f approx) sp pc rs m) | match_states_call: - forall s f args m s', - list_forall2 match_stackframes s s' -> + forall s f tf args m s', + match_stackframes s s' (proj_sig_res (funsig f)) -> + Val.has_type_list args (sig_args (funsig f)) -> + transf_fundef f = OK tf -> match_states (Callstate s f args m) - (Callstate s' (transf_fundef f) args m) + (Callstate s' tf args m) | match_states_return: - forall s s' v m, - list_forall2 match_stackframes s s' -> + forall s s' ty v m, + match_stackframes s s' ty -> + Val.has_type v ty -> match_states (Returnstate s v m) (Returnstate s' v m). Ltac TransfInstr := match goal with - | H1: (PTree.get ?pc ?c = Some ?instr), f: function |- _ => - cut ((transf_function f).(fn_code)!pc = Some(transf_instr (analyze f)!!pc instr)); + | H1: (PTree.get ?pc ?c = Some ?instr), f: function, approx: PMap.t numbering |- _ => + cut ((transf_function' f approx).(fn_code)!pc = Some(transf_instr approx!!pc instr)); [ simpl transf_instr - | unfold transf_function, transf_code; simpl; rewrite PTree.gmap; + | unfold transf_function', transf_code; simpl; rewrite PTree.gmap; unfold option_map; rewrite H1; reflexivity ] end. @@ -815,89 +887,105 @@ Proof. induction 1; intros; inv MS; try (TransfInstr; intro C). (* Inop *) - exists (State s' (transf_function f) sp pc' rs m); split. + exists (State s' (transf_function' f approx) sp pc' rs m); split. apply exec_Inop; auto. econstructor; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H; auto. (* Iop *) - exists (State s' (transf_function f) sp pc' (rs#res <- v) m); split. + exists (State s' (transf_function' f approx) sp pc' (rs#res <- v) m); split. assert (eval_operation tge sp op rs##args m = Some v). rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. - generalize C; clear C. - case (is_trivial_op op). - intro. eapply exec_Iop'; eauto. - caseEq (find_op (analyze f)!!pc op args). intros r FIND CODE. + destruct (is_trivial_op op) as []_eqn. + eapply exec_Iop'; eauto. + destruct (find_op approx!!pc op args) as [r|]_eqn. eapply exec_Iop'; eauto. simpl. assert (eval_operation ge sp op rs##args m = Some rs#r). eapply find_op_correct; eauto. eapply wf_analyze; eauto. congruence. - intros. eapply exec_Iop'; eauto. + eapply exec_Iop'; eauto. econstructor; eauto. + apply wt_regset_assign; auto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. + simpl in H0. inv H0. rewrite <- H3. apply WTREGS. + replace (tyenv res) with (snd (type_of_operation op)). + eapply type_of_operation_sound; eauto. + rewrite <- H6. reflexivity. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. - eapply add_op_satisfiable; eauto. apply wf_analyze. + eapply add_op_satisfiable; eauto. eapply wf_analyze; eauto. (* Iload *) - exists (State s' (transf_function f) sp pc' (rs#dst <- v) m); split. + exists (State s' (transf_function' f approx) sp pc' (rs#dst <- v) m); split. assert (eval_addressing tge sp addr rs##args = Some a). rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - generalize C; clear C. - caseEq (find_load (analyze f)!!pc chunk addr args). intros r FIND CODE. + destruct (find_load approx!!pc chunk addr args) as [r|]_eqn. eapply exec_Iop'; eauto. simpl. assert (exists a, eval_addressing ge sp addr rs##args = Some a /\ Mem.loadv chunk m a = Some rs#r). eapply find_load_correct; eauto. eapply wf_analyze; eauto. - elim H3; intros a' [A B]. + destruct H3 as [a' [A B]]. congruence. - intros. eapply exec_Iload'; eauto. + eapply exec_Iload'; eauto. econstructor; eauto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. + apply wt_regset_assign. auto. rewrite H8. eapply type_of_chunk_correct; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. - eapply add_load_satisfiable; eauto. apply wf_analyze. + eapply add_load_satisfiable; eauto. eapply wf_analyze; eauto. (* Istore *) - exists (State s' (transf_function f) sp pc' rs m'); split. + exists (State s' (transf_function' f approx) sp pc' rs m'); split. assert (eval_addressing tge sp addr rs##args = Some a). rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. eapply exec_Istore; eauto. econstructor; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. - eapply kill_load_satisfiable; eauto. + eapply add_store_satisfiable; eauto. eapply wf_analyze; eauto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. + rewrite <- H8. auto. (* Icall *) - exploit find_function_translated; eauto. intro FIND'. + exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. econstructor; split. eapply exec_Icall; eauto. - apply sig_preserved. + apply sig_preserved; auto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. econstructor; eauto. - constructor; auto. econstructor; eauto. intros. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. apply empty_numbering_satisfiable. + rewrite <- H7. apply wt_regset_list; auto. (* Itailcall *) - exploit find_function_translated; eauto. intro FIND'. + exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. econstructor; split. eapply exec_Itailcall; eauto. - apply sig_preserved. - econstructor; eauto. + apply sig_preserved; auto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. + econstructor; eauto. + replace (proj_sig_res (funsig fd)) with (proj_sig_res (fn_sig f)). auto. + unfold proj_sig_res. rewrite H6; auto. + rewrite <- H7. apply wt_regset_list; auto. (* Ibuiltin *) econstructor; split. eapply exec_Ibuiltin; eauto. eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. econstructor; eauto. + apply wt_regset_assign; auto. + rewrite H6. eapply external_call_well_typed; eauto. eapply analysis_correct_1; eauto. simpl; auto. unfold transfer; rewrite H. - apply add_unknown_satisfiable. apply wf_kill_loads. apply wf_analyze. - eapply kill_load_satisfiable; eauto. + apply add_unknown_satisfiable. apply wf_kill_equations. eapply wf_analyze; eauto. + eapply kill_loads_satisfiable; eauto. (* Icond *) econstructor; split. @@ -916,26 +1004,36 @@ Proof. (* Ireturn *) econstructor; split. eapply exec_Ireturn; eauto. - constructor; auto. + econstructor; eauto. + generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI. + unfold proj_sig_res. rewrite <- H2. destruct or; simpl; auto. (* internal function *) - simpl. econstructor; split. + monadInv H7. unfold transf_function in EQ. + destruct (type_function f) as [tyenv|]_eqn; try discriminate. + destruct (analyze f) as [approx|]_eqn; inv EQ. + assert (WTF: wt_function f tyenv). apply type_function_correct; auto. + econstructor; split. eapply exec_function_internal; eauto. simpl. econstructor; eauto. - apply analysis_correct_entry. + apply wt_init_regs. inv WTF. rewrite wt_params; auto. + apply analysis_correct_entry; auto. (* external function *) - simpl. econstructor; split. + monadInv H7. + econstructor; split. eapply exec_function_external; eauto. eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. + simpl. eapply external_call_well_typed; eauto. (* return *) - inv H3. inv H1. + inv H3. econstructor; split. eapply exec_return; eauto. - econstructor; eauto. + econstructor; eauto. + apply wt_regset_assign; auto. Qed. Lemma transf_initial_states: @@ -943,14 +1041,15 @@ Lemma transf_initial_states: exists st2, initial_state tprog st2 /\ match_states st1 st2. Proof. intros. inversion H. - exists (Callstate nil (transf_fundef f) nil m0); split. + exploit funct_ptr_translated; eauto. intros [tf [A B]]. + exists (Callstate nil tf nil m0); split. econstructor; eauto. - apply Genv.init_mem_transf; auto. - change (prog_main tprog) with (prog_main prog). + eapply Genv.init_mem_transf_partial; eauto. + replace (prog_main tprog) with (prog_main prog). rewrite symbols_preserved. eauto. - apply funct_ptr_translated; auto. - rewrite <- H3. apply sig_preserved. - constructor. constructor. + symmetry. eapply transform_partial_program_main; eauto. + rewrite <- H3. apply sig_preserved; auto. + constructor. rewrite H3. constructor. rewrite H3. constructor. auto. Qed. Lemma transf_final_states: diff --git a/driver/Compiler.v b/driver/Compiler.v index 6779aaf..be4f981 100644 --- a/driver/Compiler.v +++ b/driver/Compiler.v @@ -140,7 +140,7 @@ Definition transf_rtl_fundef (f: RTL.fundef) : res Asm.fundef := @@ print print_RTL_tailcall @@ Constprop.transf_fundef @@ print print_RTL_constprop - @@ CSE.transf_fundef + @@@ CSE.transf_fundef @@ print print_RTL_cse @@@ Allocation.transf_fundef @@ Tunneling.tunnel_fundef @@ -338,7 +338,7 @@ Proof. eapply compose_forward_simulation. apply Tailcallproof.transf_program_correct. eapply compose_forward_simulation. apply Constpropproof.transf_program_correct. - eapply compose_forward_simulation. apply CSEproof.transf_program_correct. + eapply compose_forward_simulation. apply CSEproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply Allocproof.transf_program_correct. eassumption. eapply compose_forward_simulation. apply Tunnelingproof.transf_program_correct. eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. eauto. @@ -606,6 +606,61 @@ Proof. destruct c; simpl; try congruence. reflexivity. Qed. +(** Checking whether two addressings, applied to the same arguments, produce + separated memory addresses. Used in [CSE]. *) + +Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing) + (chunk2: memory_chunk) (addr2: addressing) : bool := + match addr1, addr2 with + | Aindexed ofs1, Aindexed ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | Aglobal s1 ofs1, Aglobal s2 ofs2 => + if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true + | Abased s1 ofs1, Abased s2 ofs2 => + if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true + | Ainstack ofs1, Ainstack ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | _, _ => false + end. + +Lemma addressing_separated_sound: + forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2, + addressing_separated chunk1 addr1 chunk2 addr2 = true -> + eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) -> + eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) -> + b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1. +Proof. + unfold addressing_separated; intros. + generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2. + destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv. +(* Aindexed *) + destruct v; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +(* Aglobal *) + unfold symbol_address in *. + destruct (Genv.find_symbol ge i1) as []_eqn; inv H2. + destruct (Genv.find_symbol ge i) as []_eqn; inv H1. + destruct (ident_eq i i1). subst. + replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)). + replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)). + right. apply Int.no_overlap_sound; auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto. +(* Abased *) + unfold symbol_address in *. + destruct (Genv.find_symbol ge i1) as []_eqn; simpl in *; try discriminate. + destruct v; inv H2. + destruct (Genv.find_symbol ge i) as []_eqn; inv H1. + destruct (ident_eq i i1). subst. + rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3). + right. apply Int.no_overlap_sound; auto. + left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto. +(* Ainstack *) + destruct sp; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +Qed. + (** * Invariance and compatibility properties. *) (** [eval_operation] and [eval_addressing] depend on a global environment diff --git a/lib/Integers.v b/lib/Integers.v index 9f58de3..1e58c2d 100644 --- a/lib/Integers.v +++ b/lib/Integers.v @@ -782,6 +782,30 @@ Proof. auto with ints. omega. Qed. +Theorem unsigned_add_carry: + forall x y, + unsigned (add x y) = unsigned x + unsigned y - unsigned (add_carry x y zero) * modulus. +Proof. + intros. + unfold add, add_carry. rewrite unsigned_zero. rewrite Zplus_0_r. simpl. + generalize (unsigned_range x) (unsigned_range y). intros. + destruct (zlt (unsigned x + unsigned y) modulus). + rewrite unsigned_zero. apply Zmod_unique with 0. omega. omega. + rewrite unsigned_one. apply Zmod_unique with 1. omega. omega. +Qed. + +Corollary unsigned_add_either: + forall x y, + unsigned (add x y) = unsigned x + unsigned y + \/ unsigned (add x y) = unsigned x + unsigned y - modulus. +Proof. + intros. rewrite unsigned_add_carry. unfold add_carry. + rewrite unsigned_zero. rewrite Zplus_0_r. + destruct (zlt (unsigned x + unsigned y) modulus). + rewrite unsigned_zero. left; omega. + rewrite unsigned_one. right; omega. +Qed. + (** ** Properties of negation *) Theorem neg_repr: forall z, neg (repr z) = repr (-z). @@ -3023,6 +3047,32 @@ Proof. generalize (unsigned_range x). omega. omega. Qed. +(** Non-overlapping test *) + +Definition no_overlap (ofs1: int) (sz1: Z) (ofs2: int) (sz2: Z) : bool := + let x1 := unsigned ofs1 in let x2 := unsigned ofs2 in + zlt (x1 + sz1) modulus && zlt (x2 + sz2) modulus + && (zle (x1 + sz1) x2 || zle (x2 + sz2) x1). + +Lemma no_overlap_sound: + forall ofs1 sz1 ofs2 sz2 base, + sz1 > 0 -> sz2 > 0 -> no_overlap ofs1 sz1 ofs2 sz2 = true -> + unsigned (add base ofs1) + sz1 <= unsigned (add base ofs2) + \/ unsigned (add base ofs2) + sz2 <= unsigned (add base ofs1). +Proof. + intros. + destruct (andb_prop _ _ H1). clear H1. + destruct (andb_prop _ _ H2). clear H2. + exploit proj_sumbool_true. eexact H1. intro A; clear H1. + exploit proj_sumbool_true. eexact H4. intro B; clear H4. + assert (unsigned ofs1 + sz1 <= unsigned ofs2 \/ unsigned ofs2 + sz2 <= unsigned ofs1). + destruct (orb_prop _ _ H3). left. eapply proj_sumbool_true; eauto. right. eapply proj_sumbool_true; eauto. + clear H3. + generalize (unsigned_range ofs1) (unsigned_range ofs2). intros P Q. + generalize (unsigned_add_either base ofs1) (unsigned_add_either base ofs2). + intros [C|C] [D|D]; omega. +Qed. + End Make. (** * Specialization to integers of size 8, 32, and 64 bits *) diff --git a/powerpc/Op.v b/powerpc/Op.v index 68b349e..d2b7ed5 100644 --- a/powerpc/Op.v +++ b/powerpc/Op.v @@ -541,6 +541,61 @@ Proof. destruct c; simpl; auto; discriminate. Qed. +(** Checking whether two addressings, applied to the same arguments, produce + separated memory addresses. Used in [CSE]. *) + +Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing) + (chunk2: memory_chunk) (addr2: addressing) : bool := + match addr1, addr2 with + | Aindexed ofs1, Aindexed ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | Aglobal s1 ofs1, Aglobal s2 ofs2 => + if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true + | Abased s1 ofs1, Abased s2 ofs2 => + if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true + | Ainstack ofs1, Ainstack ofs2 => + Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) + | _, _ => false + end. + +Lemma addressing_separated_sound: + forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2, + addressing_separated chunk1 addr1 chunk2 addr2 = true -> + eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) -> + eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) -> + b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1. +Proof. + unfold addressing_separated; intros. + generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2. + destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv. +(* Aindexed *) + destruct v; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +(* Aglobal *) + unfold symbol_address in *. + destruct (Genv.find_symbol ge i1) as []_eqn; inv H2. + destruct (Genv.find_symbol ge i) as []_eqn; inv H1. + destruct (ident_eq i i1). subst. + replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)). + replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)). + right. apply Int.no_overlap_sound; auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto. +(* Abased *) + unfold symbol_address in *. + destruct (Genv.find_symbol ge i1) as []_eqn; simpl in *; try discriminate. + destruct v; inv H2. + destruct (Genv.find_symbol ge i) as []_eqn; inv H1. + destruct (ident_eq i i1). subst. + rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3). + right. apply Int.no_overlap_sound; auto. + left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto. +(* Ainstack *) + destruct sp; simpl in *; inv H1; inv H2. + right. apply Int.no_overlap_sound; auto. +Qed. + (** * Invariance and compatibility properties. *) (** [eval_operation] and [eval_addressing] depend on a global environment |