9 files changed, 556 insertions, 231 deletions
diff --git a/.depend b/.depend
index 659bf22..bebd789 100644
--- a/.depend
+++ b/.depend
@@ -42,8 +42,8 @@ $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob: $(ARCH)/ConstpropOp.v lib/Coqli
 backend/Constprop.vo backend/Constprop.glob: backend/Constprop.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo $(ARCH)/ConstpropOp.vo
 $(ARCH)/ConstpropOpproof.vo $(ARCH)/ConstpropOpproof.glob: $(ARCH)/ConstpropOpproof.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo
 backend/Constpropproof.vo backend/Constpropproof.glob: backend/Constpropproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Events.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo lib/Lattice.vo backend/Kildall.vo $(ARCH)/ConstpropOp.vo backend/Constprop.vo $(ARCH)/ConstpropOpproof.vo
-backend/CSE.vo backend/CSE.glob: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo
-backend/CSEproof.vo backend/CSEproof.glob: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo backend/CSE.vo
+backend/CSE.vo backend/CSE.glob: backend/CSE.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo
+backend/CSEproof.vo backend/CSEproof.glob: backend/CSEproof.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/RTLtyping.vo backend/Kildall.vo backend/CSE.vo
 $(ARCH)/Machregs.vo $(ARCH)/Machregs.glob: $(ARCH)/Machregs.v lib/Coqlib.vo lib/Maps.vo common/AST.vo
 backend/Locations.vo backend/Locations.glob: backend/Locations.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Values.vo $(ARCH)/Machregs.vo
 $(ARCH)/$(VARIANT)/Conventions1.vo $(ARCH)/$(VARIANT)/Conventions1.glob: $(ARCH)/$(VARIANT)/Conventions1.v lib/Coqlib.vo common/AST.vo backend/Locations.vo
@@ -86,9 +86,9 @@ backend/Stackingtyping.vo backend/Stackingtyping.glob: backend/Stackingtyping.v
 backend/Machsem.vo backend/Machsem.glob: backend/Machsem.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo $(ARCH)/$(VARIANT)/Stacklayout.vo $(ARCH)/Asmgenretaddr.vo
 $(ARCH)/Asm.vo $(ARCH)/Asm.glob: $(ARCH)/Asm.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo backend/Locations.vo $(ARCH)/$(VARIANT)/Stacklayout.vo backend/Conventions.vo
 $(ARCH)/Asmgen.vo $(ARCH)/Asmgen.glob: $(ARCH)/Asmgen.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo
-$(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenretaddr.glob: $(ARCH)/Asmgenretaddr.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo
-$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo lib/Maps.vo common/AST.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo
-$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo backend/Conventions.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenproof1.vo
+$(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenretaddr.glob: $(ARCH)/Asmgenretaddr.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo
+$(ARCH)/Asmgenproof1.vo $(ARCH)/Asmgenproof1.glob: $(ARCH)/Asmgenproof1.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo $(ARCH)/Op.vo backend/Locations.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo backend/Conventions.vo
+$(ARCH)/Asmgenproof.vo $(ARCH)/Asmgenproof.glob: $(ARCH)/Asmgenproof.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Locations.vo backend/Conventions.vo backend/Mach.vo backend/Machsem.vo backend/Machtyping.vo $(ARCH)/Asm.vo $(ARCH)/Asmgen.vo $(ARCH)/Asmgenretaddr.vo $(ARCH)/Asmgenproof1.vo
 cfrontend/Csyntax.vo cfrontend/Csyntax.glob: cfrontend/Csyntax.v lib/Coqlib.vo common/Errors.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo
 cfrontend/Csem.vo cfrontend/Csem.glob: cfrontend/Csem.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo common/Memory.vo common/Events.vo common/Globalenvs.vo cfrontend/Csyntax.vo common/Smallstep.vo
 cfrontend/Cstrategy.vo cfrontend/Cstrategy.glob: cfrontend/Cstrategy.v lib/Axioms.vo lib/Coqlib.vo common/Errors.vo lib/Maps.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/AST.vo common/Memory.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo cfrontend/Csyntax.vo cfrontend/Csem.vo
diff --git a/Changelog b/Changelog
index 0f5ab97..963ffd1 100644
--- a/Changelog
+++ b/Changelog
@@ -20,6 +20,7 @@ Performance improvements:
 - Shorter, more efficient code generated for accessing volatile global
   variables.
 - Better code generated for && and || outside conditional tests.
+- More aggressive common subexpression elimination (CSE) of memory loads.
 - Improved register allocation for invocations of built-ins,
   especially for annotations.
 - In Cminor and down, make safe operators non-strict: they return Vundef
diff --git a/arm/Op.v b/arm/Op.v
index 905068f..99ba924 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -610,6 +610,37 @@ Proof.
   intros. destruct c; simpl; auto; congruence.
 Qed.
 
+(** Checking whether two addressings, applied to the same arguments, produce
+  separated memory addresses.  Used in [CSE]. *)
+
+Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
+                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
+  match addr1, addr2 with
+  | Aindexed ofs1, Aindexed ofs2 => 
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | Ainstack ofs1, Ainstack ofs2 =>
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | _, _ => false
+  end.
+
+Lemma addressing_separated_sound:
+  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
+  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
+  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
+  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
+  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
+Proof.
+  unfold addressing_separated; intros.
+  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
+  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
+(* Aindexed *)
+  destruct v; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+(* Ainstack *)
+  destruct sp; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+Qed.
+
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment
diff --git a/backend/CSE.v b/backend/CSE.v
index 44ed590..ba8dec8 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import Maps.
+Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
@@ -24,6 +25,7 @@ Require Import Globalenvs.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
+Require Import RTLtyping.
 Require Import Kildall.
 
 (** * Value numbering *)
@@ -188,26 +190,57 @@ Definition add_unknown (n: numbering) (rd: reg) :=
               n.(num_eqs)
               (PTree.set rd n.(num_next) n.(num_reg)).
 
-(** [kill_load n] removes all equations involving memory loads,
-  as well as those involving memory-dependent operators.
-  It is used to reflect the effect of a memory store, which can
-  potentially invalidate all such equations. *)
+(** [kill_equations pred n] remove all equations satisfying predicate [pred]. *)
 
-Fixpoint kill_load_eqs (eqs: list (valnum * rhs)) : list (valnum * rhs) :=
+Fixpoint kill_eqs (pred: rhs -> bool) (eqs: list (valnum * rhs)) : list (valnum * rhs) :=
   match eqs with
   | nil => nil
-  | eq :: rem =>
-      match eq with
-      | (_, Op op _)  => if op_depends_on_memory op then kill_load_eqs rem else eq :: kill_load_eqs rem
-      | (_, Load _ _ _) => kill_load_eqs rem
-      end
+  | eq :: rem => if pred (snd eq) then kill_eqs pred rem else eq :: kill_eqs pred rem
   end.
 
-Definition kill_loads (n: numbering) : numbering :=
+Definition kill_equations (pred: rhs -> bool) (n: numbering) : numbering :=
   mknumbering n.(num_next)
-              (kill_load_eqs n.(num_eqs))
+              (kill_eqs pred n.(num_eqs))
               n.(num_reg).
 
+(** [kill_loads n] removes all equations involving memory loads,
+  as well as those involving memory-dependent operators.
+  It is used to reflect the effect of a builtin operation, which can
+  change memory in unpredictable ways and potentially invalidate all such equations. *)
+
+Definition filter_loads (r: rhs) : bool :=
+  match r with
+  | Op op _ => op_depends_on_memory op
+  | Load _ _ _ => true
+  end.
+
+Definition kill_loads (n: numbering) : numbering :=
+  kill_equations filter_loads n.
+
+(** [add_store n chunk addr rargs rsrc] updates the numbering [n] to reflect
+  the effect of a store instruction [Istore chunk addr rargs rsrc].
+  Equations involving the memory state are removed from [n], unless we
+  can prove that the store does not invalidate them.
+  Then, an equations [rsrc = Load chunk addr rargs] is added to reflect
+  the known content of the stored memory area, but only if [chunk] is
+  a "full-size" quantity ([Mint32] or [Mfloat64]). *)
+
+Definition filter_after_store (chunk: memory_chunk) (addr: addressing) (vl: list valnum) (r: rhs) : bool :=
+  match r with
+  | Op op vl' => op_depends_on_memory op
+  | Load chunk' addr' vl' =>
+      negb(eq_list_valnum vl vl' && addressing_separated chunk addr chunk' addr')
+  end.
+
+Definition add_store (n: numbering) (chunk: memory_chunk) (addr: addressing)
+                                   (rargs: list reg) (rsrc: reg) : numbering :=
+  let (n1, vargs) := valnum_regs n rargs in
+  let n2 := kill_equations (filter_after_store chunk addr vargs) n1 in
+  match chunk with
+  | Mint32 | Mfloat64 => add_rhs n2 rsrc (Load chunk addr vargs)
+  | _ => n2
+  end.
+
 (* [reg_valnum n vn] returns a register that is mapped to value number
    [vn], or [None] if no such register exists. *)
 
@@ -339,7 +372,7 @@ Definition transfer (f: function) (pc: node) (before: numbering) :=
       | Iload chunk addr args dst s =>
           add_load before dst chunk addr args
       | Istore chunk addr args src s =>
-          kill_loads before
+          add_store before chunk addr args src
       | Icall sig ros args res s =>
           empty_numbering
       | Itailcall sig ros args =>
@@ -358,16 +391,10 @@ Definition transfer (f: function) (pc: node) (before: numbering) :=
 (** The static analysis solves the dataflow inequations implied
   by the [transfer] function using the ``extended basic block'' solver,
   which produces sub-optimal solutions quickly.  The result is
-  a mapping from program points to numberings.  In the unlikely
-  case where the solver fails to find a solution, we simply associate
-  empty numberings to all program points, which is semantically correct
-  and effectively deactivates the CSE optimization. *)
-
-Definition analyze (f: RTL.function): PMap.t numbering :=
-  match Solver.fixpoint (successors f) (transfer f) f.(fn_entrypoint) with
-  | None => PMap.init empty_numbering
-  | Some res => res
-  end.
+  a mapping from program points to numberings. *)
+
+Definition analyze (f: RTL.function): option (PMap.t numbering) :=
+  Solver.fixpoint (successors f) (transfer f) f.(fn_entrypoint).
 
 (** * Code transformation *)
 
@@ -400,18 +427,25 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
 Definition transf_code (approxs: PMap.t numbering) (instrs: code) : code :=
   PTree.map (fun pc instr => transf_instr approxs!!pc instr) instrs.
 
-Definition transf_function (f: function) : function :=
-  let approxs := analyze f in
-  mkfunction
-        f.(fn_sig)
-        f.(fn_params)
-        f.(fn_stacksize)
-        (transf_code approxs f.(fn_code))
-        f.(fn_entrypoint).
+Definition transf_function (f: function) : res function :=
+  match type_function f with
+  | Error msg => Error msg
+  | OK tyenv =>
+      match analyze f with
+      | None => Error (msg "CSE failure")
+      | Some approxs =>
+          OK(mkfunction
+               f.(fn_sig)
+               f.(fn_params)
+               f.(fn_stacksize)
+               (transf_code approxs f.(fn_code))
+               f.(fn_entrypoint))
+      end
+  end.
 
-Definition transf_fundef (f: fundef) : fundef :=
-  AST.transf_fundef transf_function f.
+Definition transf_fundef (f: fundef) : res fundef :=
+  AST.transf_partial_fundef transf_function f.
 
-Definition transf_program (p: program) : program :=
-  transform_program transf_fundef p.
+Definition transf_program (p: program) : res program :=
+  transform_partial_program transf_fundef p.
 
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index c685ef6..6543826 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -15,6 +15,7 @@
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Errors.
 Require Import Integers.
 Require Import Floats.
 Require Import Values.
@@ -25,6 +26,7 @@ Require Import Smallstep.
 Require Import Op.
 Require Import Registers.
 Require Import RTL.
+Require Import RTLtyping.
 Require Import Kildall.
 Require Import CSE.
 
@@ -203,25 +205,37 @@ Proof.
   apply Plt_trans_succ. eauto.
 Qed.
 
-Lemma kill_load_eqs_incl:
-  forall eqs, List.incl (kill_load_eqs eqs) eqs.
+Remark kill_eqs_in:
+  forall pred v rhs eqs,
+  In (v, rhs) (kill_eqs pred eqs) -> In (v, rhs) eqs /\ pred rhs = false.
 Proof.
-  induction eqs; simpl; intros.
-  apply incl_refl.
-  destruct a. destruct r. 
-  destruct (op_depends_on_memory o). auto with coqlib. 
-  apply incl_same_head; auto.
-  auto with coqlib.
+  induction eqs; simpl; intros. 
+  tauto.
+  destruct (pred (snd a)) as []_eqn. 
+  exploit IHeqs; eauto. tauto. 
+  simpl in H; destruct H. subst a. auto. exploit IHeqs; eauto. tauto. 
 Qed.
 
-Lemma wf_kill_loads:
-  forall n, wf_numbering n -> wf_numbering (kill_loads n).
+Lemma wf_kill_equations:
+  forall pred n, wf_numbering n -> wf_numbering (kill_equations pred n).
 Proof.
-  intros. inversion H. unfold kill_loads; split; simpl; intros.
-  apply H0. apply kill_load_eqs_incl. auto.
+  intros. inversion H. unfold kill_equations; split; simpl; intros.
+  exploit kill_eqs_in; eauto. intros [A B]. auto. 
   eauto.
 Qed.
 
+Lemma wf_add_store:
+  forall n chunk addr rargs rsrc,
+  wf_numbering n -> wf_numbering (add_store n chunk addr rargs rsrc).
+Proof.
+  intros. unfold add_store. 
+  destruct (valnum_regs n rargs) as [n1 vargs]_eqn.
+  exploit wf_valnum_regs; eauto. intros [A [B C]].
+  assert (wf_numbering (kill_equations (filter_after_store chunk addr vargs) n1)).
+    apply wf_kill_equations. auto.
+  destruct chunk; auto; apply wf_add_rhs; auto.
+Qed.
+
 Lemma wf_transfer:
   forall f pc n, wf_numbering n -> wf_numbering (transfer f pc n).
 Proof.
@@ -230,25 +244,22 @@ Proof.
   destruct i; auto.
   apply wf_add_op; auto.
   apply wf_add_load; auto.
-  apply wf_kill_loads; auto.
+  apply wf_add_store; auto.
   apply wf_empty_numbering.
   apply wf_empty_numbering.
-  apply wf_add_unknown. apply wf_kill_loads; auto.
+  apply wf_add_unknown. apply wf_kill_equations; auto.
 Qed.
 
 (** As a consequence, the numberings computed by the static analysis
   are well formed. *)
 
 Theorem wf_analyze:
-  forall f pc, wf_numbering (analyze f)!!pc.
+  forall f approx pc, analyze f = Some approx -> wf_numbering approx!!pc.
 Proof.
   unfold analyze; intros.
-  caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)).
-  intros approx EQ.
   eapply Solver.fixpoint_invariant with (P := wf_numbering); eauto.
   exact wf_empty_numbering.   
   exact (wf_transfer f).
-  intro. rewrite PMap.gi. apply wf_empty_numbering.
 Qed.
 
 (** ** Properties of satisfiability of numberings *)
@@ -264,7 +275,6 @@ Section SATISFIABILITY.
 
 Variable ge: genv.
 Variable sp: val.
-Variable m: mem.
 
 (** Agremment between two mappings from value numbers to values
   up to a given value number. *)
@@ -303,7 +313,7 @@ Qed.
   extensional with respect to [valu_agree]. *)
 
 Lemma numbering_holds_exten:
-  forall valu1 valu2 n rs,
+  forall valu1 valu2 n rs m,
   valu_agree valu1 valu2 n.(num_next) ->
   wf_numbering n ->
   numbering_holds valu1 ge sp rs m n ->
@@ -328,7 +338,7 @@ Qed.
   value numbers. *)
 
 Lemma valnum_reg_holds:
-  forall valu1 rs n r n' v,
+  forall valu1 rs n r n' v m,
   wf_numbering n ->
   numbering_holds valu1 ge sp rs m n ->
   valnum_reg n r = (n', v) ->
@@ -351,22 +361,21 @@ Proof.
   assert (AG: valu_agree valu1 valu2 n.(num_next)).
     red; intros. unfold valu2. apply VMap.gso. 
     auto with coqlib.
-  elim (numbering_holds_exten _ _ _ _ AG H0 H1); intros.
+  destruct (numbering_holds_exten _ _ _ _ _ AG H0 H1) as [A B].
   exists valu2. 
   split. split; simpl; intros. auto. 
   unfold valu2, VMap.set, ValnumEq.eq. 
-  rewrite PTree.gsspec in H7. destruct (peq r0 r).
-  inversion H7. rewrite peq_true. congruence.
-  case (peq vn (num_next n)); intro. 
-  inversion H0. generalize (H9 _ _ H7).  rewrite e. intro.
-  elim (Plt_strict _ H10). 
-  auto.
-  split. unfold valu2. apply VMap.gss. 
+  rewrite PTree.gsspec in H5. destruct (peq r0 r).
+  inv H5. rewrite peq_true. auto.
+  rewrite peq_false. auto. 
+  assert (Plt vn (num_next n)). inv H0. eauto. 
+  red; intros; subst; eapply Plt_strict; eauto. 
+  split. unfold valu2. rewrite VMap.gss. auto. 
   auto.
 Qed.
 
 Lemma valnum_regs_holds:
-  forall rs rl valu1 n n' vl,
+  forall rs rl valu1 n n' vl m,
   wf_numbering n ->
   numbering_holds valu1 ge sp rs m n ->
   valnum_regs n rl = (n', vl) ->
@@ -378,15 +387,15 @@ Proof.
   induction rl; simpl; intros.
   (* base case *)
   inversion H1; subst n'; subst vl.
-  exists valu1. split. auto. split. reflexivity. apply valu_agree_refl.
+  exists valu1. split. auto. split. auto. apply valu_agree_refl.
   (* inductive case *)
   caseEq (valnum_reg n a); intros n1 v1 EQ1.
   caseEq (valnum_regs n1 rl); intros ns vs EQs.
   rewrite EQ1 in H1; rewrite EQs in H1. inversion H1. subst vl; subst n'.
-  generalize (valnum_reg_holds _ _ _ _ _ _ H H0 EQ1).
+  generalize (valnum_reg_holds _ _ _ _ _ _ _ H H0 EQ1).
   intros [valu2 [A [B C]]].
   generalize (wf_valnum_reg _ _ _ _ H EQ1). intros [D [E F]].
-  generalize (IHrl _ _ _ _ D A EQs). 
+  generalize (IHrl _ _ _ _ _ D A EQs). 
   intros [valu3 [P [Q R]]].
   exists valu3. 
   split. auto. 
@@ -398,7 +407,7 @@ Qed.
   of the value to which a right-hand side of an equation evaluates. *)
 
 Definition rhs_evals_to
-    (valu: valnum -> val) (rh: rhs) (v: val) : Prop :=
+    (valu: valnum -> val) (rh: rhs) (m: mem) (v: val) : Prop :=
   match rh with
   | Op op vl => 
       eval_operation ge sp op (List.map valu vl) m = Some v
@@ -409,23 +418,23 @@ Definition rhs_evals_to
   end.
 
 Lemma equation_evals_to_holds_1:
-  forall valu rh v vres,
-  rhs_evals_to valu rh v ->
+  forall valu rh v vres m,
+  rhs_evals_to valu rh m v ->
   equation_holds valu ge sp m vres rh ->
   valu vres = v.
 Proof.
-  intros until vres. unfold rhs_evals_to, equation_holds.
+  intros until m. unfold rhs_evals_to, equation_holds.
   destruct rh. congruence.
   intros [a1 [A1 B1]] [a2 [A2 B2]]. congruence.
 Qed.
 
 Lemma equation_evals_to_holds_2:
-  forall valu rh v vres,
+  forall valu rh v vres m,
   wf_rhs vres rh ->
-  rhs_evals_to valu rh v ->
+  rhs_evals_to valu rh m v ->
   equation_holds (VMap.set vres v valu) ge sp m vres rh.
 Proof.
-  intros until vres. unfold wf_rhs, rhs_evals_to, equation_holds.
+  intros until m. unfold wf_rhs, rhs_evals_to, equation_holds.
   rewrite VMap.gss. 
   assert (forall vl: list valnum,
             (forall v, In v vl -> Plt v vres) ->
@@ -438,13 +447,14 @@ Qed.
 (** The numbering obtained by adding an equation [rd = rhs] is satisfiable
   in a concrete register set where [rd] is set to the value of [rhs]. *)
 
-Lemma add_rhs_satisfiable:
-  forall n rh valu1 rs rd v,
+Lemma add_rhs_satisfiable_gen:
+  forall n rh valu1 rs rd rs' m,
   wf_numbering n ->
   wf_rhs n.(num_next) rh ->
   numbering_holds valu1 ge sp rs m n ->
-  rhs_evals_to valu1 rh v ->
-  numbering_satisfiable ge sp (rs#rd <- v) m (add_rhs n rd rh).
+  rhs_evals_to valu1 rh m (rs'#rd) ->
+  (forall r, r <> rd -> rs'#r = rs#r) ->
+  numbering_satisfiable ge sp rs' m (add_rhs n rd rh).
 Proof.
   intros. unfold add_rhs. 
   caseEq (find_valnum_rhs rh n.(num_eqs)).
@@ -452,34 +462,45 @@ Proof.
   intros vres FINDVN. inversion H1.
   exists valu1. split; simpl; intros.
   auto. 
-  rewrite Regmap.gsspec. 
-  rewrite PTree.gsspec in H5.
+  rewrite PTree.gsspec in H6.
   destruct (peq r rd).
+  inv H6. 
   symmetry. eapply equation_evals_to_holds_1; eauto.
-  apply H3. apply find_valnum_rhs_correct. congruence.
-  auto.
+  apply H4. apply find_valnum_rhs_correct. congruence.
+  rewrite H3; auto.
   (* RHS not found *)
   intro FINDVN. 
-  set (valu2 := VMap.set n.(num_next) v valu1).
+  set (valu2 := VMap.set n.(num_next) (rs'#rd) valu1).
   assert (AG: valu_agree valu1 valu2 n.(num_next)).
     red; intros. unfold valu2. apply VMap.gso. 
     auto with coqlib.
-  elim (numbering_holds_exten _ _ _ _ AG H H1); intros.
+  elim (numbering_holds_exten _ _ _ _ _ AG H H1); intros.
   exists valu2. split; simpl; intros.
-  elim H5; intro.
-  inversion H6; subst vn; subst rh0. 
-  unfold valu2. eapply equation_evals_to_holds_2; eauto.
-  auto.
-  rewrite Regmap.gsspec. rewrite PTree.gsspec in H5. destruct (peq r rd).
-  unfold valu2. inversion H5. symmetry. apply VMap.gss.
-  auto.
+  destruct H6. 
+  inv H6. unfold valu2. eapply equation_evals_to_holds_2; eauto. auto. 
+  rewrite PTree.gsspec in H6. destruct (peq r rd).
+  unfold valu2. inv H6. rewrite VMap.gss. auto.
+  rewrite H3; auto.
+Qed.
+
+Lemma add_rhs_satisfiable:
+  forall n rh valu1 rs rd v m,
+  wf_numbering n ->
+  wf_rhs n.(num_next) rh ->
+  numbering_holds valu1 ge sp rs m n ->
+  rhs_evals_to valu1 rh m v ->
+  numbering_satisfiable ge sp (rs#rd <- v) m (add_rhs n rd rh).
+Proof.
+  intros. eapply add_rhs_satisfiable_gen; eauto. 
+  rewrite Regmap.gss; auto.
+  intros. apply Regmap.gso; auto.
 Qed.
 
 (** [add_op] returns a numbering that is satisfiable in the register
   set after execution of the corresponding [Iop] instruction. *)
 
 Lemma add_op_satisfiable:
-  forall n rs op args dst v,
+  forall n rs op args dst v m,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
   eval_operation ge sp op rs##args m = Some v ->
@@ -491,13 +512,13 @@ Proof.
   intros arg EQ. 
   destruct (is_move_operation_correct _ _ EQ) as [A B]. subst op args.
   caseEq (valnum_reg n arg). intros n1 v1 VL.
-  generalize (valnum_reg_holds _ _ _ _ _ _ H H2 VL). intros [valu2 [A [B C]]].
+  generalize (valnum_reg_holds _ _ _ _ _ _ _ H H2 VL). intros [valu2 [A [B C]]].
   generalize (wf_valnum_reg _ _ _ _ H VL). intros [D [E F]].  
   elim A; intros. exists valu2; split; simpl; intros.
   auto. rewrite Regmap.gsspec. rewrite PTree.gsspec in H5.
   destruct (peq r dst). simpl in H1. congruence. auto.
   intro NEQ. caseEq (valnum_regs n args). intros n1 vl VRL.
-  generalize (valnum_regs_holds _ _ _ _ _ _ H H2 VRL). intros [valu2 [A [B C]]].
+  generalize (valnum_regs_holds _ _ _ _ _ _ _ H H2 VRL). intros [valu2 [A [B C]]].
   generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]].
   apply add_rhs_satisfiable with valu2; auto.
   simpl. congruence. 
@@ -507,19 +528,17 @@ Qed.
   set after execution of the corresponding [Iload] instruction. *)
 
 Lemma add_load_satisfiable:
-  forall n rs chunk addr args dst a v,
+  forall n rs chunk addr args dst a v m,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.loadv chunk m a = Some v ->
-  numbering_satisfiable ge sp 
-     (rs#dst <- v)
-     m (add_load n dst chunk addr args).
+  numbering_satisfiable ge sp (rs#dst <- v) m (add_load n dst chunk addr args).
 Proof.
   intros. inversion H0.
   unfold add_load.
   caseEq (valnum_regs n args). intros n1 vl VRL.
-  generalize (valnum_regs_holds _ _ _ _ _ _ H H3 VRL). intros [valu2 [A [B C]]].
+  generalize (valnum_regs_holds _ _ _ _ _ _ _ H H3 VRL). intros [valu2 [A [B C]]].
   generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]].
   apply add_rhs_satisfiable with valu2; auto.
   simpl. exists a; split; congruence. 
@@ -529,11 +548,10 @@ Qed.
   register set after setting the target register to any value. *)
 
 Lemma add_unknown_satisfiable:
-  forall n rs dst v,
+  forall n rs dst v m,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
-  numbering_satisfiable ge sp 
-     (rs#dst <- v) m (add_unknown n dst).
+  numbering_satisfiable ge sp (rs#dst <- v) m (add_unknown n dst).
 Proof.
   intros. destruct H0 as [valu A]. 
   set (valu' := VMap.set n.(num_next) v valu).
@@ -548,39 +566,77 @@ Proof.
   eauto.
 Qed.
 
-(** [kill_load] preserves satisfiability.  Moreover, the resulting numbering
-  is satisfiable in any concrete memory state. *)
+(** Satisfiability of [kill_equations]. *)
 
-Lemma kill_load_eqs_ops:
-  forall v rhs eqs,
-  In (v, rhs) (kill_load_eqs eqs) ->
-  match rhs with
-  | Op op _ => op_depends_on_memory op = false
-  | Load _ _ _ => False
-  end.
+Lemma kill_equations_holds:
+  forall pred valu n rs m m',
+  (forall v r,
+   equation_holds valu ge sp m v r -> pred r = false -> equation_holds valu ge sp m' v r) ->
+  numbering_holds valu ge sp rs m n ->
+  numbering_holds valu ge sp rs m' (kill_equations pred n).
 Proof.
-  induction eqs; simpl; intros.
-  elim H.
-  destruct a. destruct r. destruct (op_depends_on_memory o) as [] _eqn.
-  apply IHeqs; auto.
-  simpl in H; destruct H. inv H. auto. 
-  apply IHeqs. auto. 
-  apply IHeqs. auto.
+  intros. destruct H0 as [A B]. red; simpl. split; intros. 
+  exploit kill_eqs_in; eauto. intros [C D]. eauto. 
+  auto.
 Qed.
 
-Lemma kill_load_satisfiable:
-  forall n rs m',
+(** [kill_loads] preserves satisfiability.  Moreover, the resulting numbering
+  is satisfiable in any concrete memory state. *)
+
+Lemma kill_loads_satisfiable:
+  forall n rs m m',
   numbering_satisfiable ge sp rs m n ->
   numbering_satisfiable ge sp rs m' (kill_loads n).
 Proof.
-  intros. inv H. destruct H0. generalize (kill_load_eqs_incl n.(num_eqs)). intro.
-  exists x. split; intros.
-  generalize (H _ _ (H1 _ H2)). 
-  generalize (kill_load_eqs_ops _ _ _ H2).
-  destruct rh; simpl.
-  intros. rewrite <- H4. apply op_depends_on_memory_correct; auto.
-  tauto.
-  apply H0. auto. 
+  intros. destruct H as [valu A]. exists valu. eapply kill_equations_holds with (m := m); eauto.
+  intros. destruct r; simpl in *. rewrite <- H. apply op_depends_on_memory_correct; auto. 
+  congruence.
+Qed.
+
+(** [add_store] returns a numbering that is satisfiable in the memory state
+  after execution of the corresponding [Istore] instruction. *)
+
+Lemma add_store_satisfiable:
+  forall n rs chunk addr args src a m m',
+  wf_numbering n ->
+  numbering_satisfiable ge sp rs m n ->
+  eval_addressing ge sp addr rs##args = Some a ->
+  Mem.storev chunk m a (rs#src) = Some m' ->
+  Val.has_type (rs#src) (type_of_chunk chunk) ->
+  numbering_satisfiable ge sp rs m' (add_store n chunk addr args src).
+Proof.
+  intros. unfold add_store. destruct H0 as [valu A].
+  destruct (valnum_regs n args) as [n1 vargs]_eqn.
+  exploit valnum_regs_holds; eauto. intros [valu' [B [C D]]].
+  exploit wf_valnum_regs; eauto. intros [U [V W]].
+  set (n2 := kill_equations (filter_after_store chunk addr vargs) n1).
+  assert (numbering_holds valu' ge sp rs m' n2).
+    apply kill_equations_holds with (m := m); auto.
+    intros. destruct r; simpl in *. 
+    rewrite <- H0. apply op_depends_on_memory_correct; auto.
+    destruct H0 as [a' [P Q]]. 
+    destruct (eq_list_valnum vargs l); simpl in H4; try congruence. subst l.
+    rewrite negb_false_iff in H4.
+    exists a'; split; auto. 
+    destruct a; simpl in H2; try congruence.
+    destruct a'; simpl in Q; try congruence.
+    simpl. rewrite <- Q. 
+    rewrite C in P. eapply Mem.load_store_other; eauto.
+    exploit addressing_separated_sound; eauto. intuition congruence.
+  assert (N2: numbering_satisfiable ge sp rs m' n2).
+    exists valu'; auto.
+  set (n3 := add_rhs n2 src (Load chunk addr vargs)).
+  assert (N3: Val.load_result chunk (rs#src) = rs#src -> numbering_satisfiable ge sp rs m' n3).
+    intro EQ. unfold n3. apply add_rhs_satisfiable_gen with valu' rs.
+    apply wf_kill_equations; auto.
+    red. auto. auto.
+    red. exists a; split. congruence. 
+    rewrite <- EQ. destruct a; simpl in H2; try discriminate. simpl. 
+    eapply Mem.load_store_same; eauto. 
+    auto.
+  destruct chunk; auto; apply N3. 
+  simpl in H3. destruct (rs#src); auto || contradiction.
+  simpl in H3. destruct (rs#src); auto || contradiction.
 Qed.
 
 (** Correctness of [reg_valnum]: if it returns a register [r],
@@ -613,10 +669,10 @@ Qed.
   result value of the operation or memory load. *)
 
 Lemma find_rhs_correct:
-  forall valu rs n rh r,
+  forall valu rs m n rh r,
   numbering_holds valu ge sp rs m n ->
   find_rhs n rh = Some r ->
-  rhs_evals_to valu rh rs#r.
+  rhs_evals_to valu rh m rs#r.
 Proof.
   intros until r.  intros NH. 
   unfold find_rhs. 
@@ -630,7 +686,7 @@ Proof.
 Qed.
 
 Lemma find_op_correct:
-  forall rs n op args r,
+  forall rs m n op args r,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
   find_op n op args = Some r ->
@@ -638,15 +694,15 @@ Lemma find_op_correct:
 Proof.
   intros until r. intros WF [valu NH].
   unfold find_op. caseEq (valnum_regs n args). intros n' vl VR FIND.
-  generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR).
+  generalize (valnum_regs_holds _ _ _ _ _ _ _ WF NH VR).
   intros [valu2 [NH2 [EQ AG]]].
   rewrite <- EQ. 
-  change (rhs_evals_to valu2 (Op op vl) rs#r).
+  change (rhs_evals_to valu2 (Op op vl) m rs#r).
   eapply find_rhs_correct; eauto.
 Qed.
 
 Lemma find_load_correct:
-  forall rs n chunk addr args r,
+  forall rs m n chunk addr args r,
   wf_numbering n ->
   numbering_satisfiable ge sp rs m n ->
   find_load n chunk addr args = Some r ->
@@ -656,10 +712,10 @@ Lemma find_load_correct:
 Proof.
   intros until r. intros WF [valu NH].
   unfold find_load. caseEq (valnum_regs n args). intros n' vl VR FIND.
-  generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR).
+  generalize (valnum_regs_holds _ _ _ _ _ _ _ WF NH VR).
   intros [valu2 [NH2 [EQ AG]]].
   rewrite <- EQ. 
-  change (rhs_evals_to valu2 (Load chunk addr vl) rs#r).
+  change (rhs_evals_to valu2 (Load chunk addr vl) m rs#r).
   eapply find_rhs_correct; eauto.
 Qed.
 
@@ -672,38 +728,29 @@ End SATISFIABILITY.
   satisfiability at [pc']. *)
 
 Theorem analysis_correct_1:
-  forall ge sp rs m f pc pc' i,
+  forall ge sp rs m f approx pc pc' i,
+  analyze f = Some approx ->
   f.(fn_code)!pc = Some i -> In pc' (successors_instr i) ->
-  numbering_satisfiable ge sp rs m (transfer f pc (analyze f)!!pc) ->
-  numbering_satisfiable ge sp rs m (analyze f)!!pc'.
-Proof.
-  intros until i.
-  generalize (wf_analyze f pc).
-  unfold analyze.
-  caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)).
-  intros res FIXPOINT WF AT SUCC.
-  assert (Numbering.ge res!!pc' (transfer f pc res!!pc)).
+  numbering_satisfiable ge sp rs m (transfer f pc approx!!pc) ->
+  numbering_satisfiable ge sp rs m approx!!pc'.
+Proof.
+  intros.
+  assert (Numbering.ge approx!!pc' (transfer f pc approx!!pc)).
     eapply Solver.fixpoint_solution; eauto.
     unfold successors_list, successors. rewrite PTree.gmap1.
-    rewrite AT. auto.
-  apply H.
-  intros. rewrite PMap.gi. apply empty_numbering_satisfiable.
+    rewrite H0. auto.
+  apply H3. auto.
 Qed.
 
 Theorem analysis_correct_entry:
-  forall ge sp rs m f,
-  numbering_satisfiable ge sp rs m (analyze f)!!(f.(fn_entrypoint)).
+  forall ge sp rs m f approx,
+  analyze f = Some approx ->
+  numbering_satisfiable ge sp rs m approx!!(f.(fn_entrypoint)).
 Proof.
   intros. 
-  replace ((analyze f)!!(f.(fn_entrypoint)))
-          with empty_numbering.
+  replace (approx!!(f.(fn_entrypoint))) with Solver.L.top.
   apply empty_numbering_satisfiable.
-  unfold analyze.
-  caseEq (Solver.fixpoint (successors f) (transfer f) (fn_entrypoint f)).
-  intros res FIXPOINT. 
-  symmetry. change empty_numbering with Solver.L.top.
-  eapply Solver.fixpoint_entry; eauto.
-  intros. symmetry. apply PMap.gi. 
+  symmetry. eapply Solver.fixpoint_entry; eauto.
 Qed.
 
 (** * Semantic preservation *)
@@ -711,40 +758,43 @@ Qed.
 Section PRESERVATION.
 
 Variable prog: program.
-Let tprog := transf_program prog.
+Variable tprog : program.
+Hypothesis TRANSF: transf_program prog = OK tprog.
 Let ge := Genv.globalenv prog.
 Let tge := Genv.globalenv tprog.
 
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
-Proof (Genv.find_symbol_transf transf_fundef prog).
+Proof (Genv.find_symbol_transf_partial transf_fundef prog TRANSF).
 
 Lemma varinfo_preserved:
   forall b, Genv.find_var_info tge b = Genv.find_var_info ge b.
-Proof (Genv.find_var_info_transf transf_fundef prog).
+Proof (Genv.find_var_info_transf_partial transf_fundef prog TRANSF).
 
 Lemma functions_translated:
   forall (v: val) (f: RTL.fundef),
   Genv.find_funct ge v = Some f ->
-  Genv.find_funct tge v = Some (transf_fundef f).
-Proof (@Genv.find_funct_transf _ _ _ transf_fundef prog).
+  exists tf, Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_transf_partial transf_fundef prog TRANSF).
 
 Lemma funct_ptr_translated:
   forall (b: block) (f: RTL.fundef),
   Genv.find_funct_ptr ge b = Some f ->
-  Genv.find_funct_ptr tge b = Some (transf_fundef f).
-Proof (@Genv.find_funct_ptr_transf _ _ _ transf_fundef prog).
+  exists tf, Genv.find_funct_ptr tge b = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial transf_fundef prog TRANSF).
 
 Lemma sig_preserved:
-  forall f, funsig (transf_fundef f) = funsig f.
+  forall f tf, transf_fundef f = OK tf -> funsig tf = funsig f.
 Proof.
-  destruct f; reflexivity.
+  unfold transf_fundef; intros. destruct f; monadInv H; auto.
+  unfold transf_function in EQ. destruct (type_function f); try discriminate. 
+  destruct (analyze f); try discriminate. inv EQ; auto. 
 Qed.
 
 Lemma find_function_translated:
   forall ros rs f,
   find_function ge ros rs = Some f ->
-  find_function tge ros rs = Some (transf_fundef f).
+  exists tf, find_function tge ros rs = Some tf /\ transf_fundef f = OK tf.
 Proof.
   intros until f; destruct ros; simpl.
   intro. apply functions_translated; auto.
@@ -753,6 +803,14 @@ Proof.
   discriminate.
 Qed.
 
+Definition transf_function' (f: function) (approxs: PMap.t numbering) : function :=
+  mkfunction
+    f.(fn_sig)
+    f.(fn_params)
+    f.(fn_stacksize)
+    (transf_code approxs f.(fn_code))
+    f.(fn_entrypoint).
+
 (** The proof of semantic preservation is a simulation argument using
   diagrams of the following form:
 <<
@@ -769,38 +827,52 @@ Qed.
   the numbering at [pc] (returned by the static analysis) is satisfiable.
 *)
 
-Inductive match_stackframes: stackframe -> stackframe -> Prop :=
-  match_stackframes_intro:
-    forall res sp pc rs f,
-    (forall v m, numbering_satisfiable ge sp (rs#res <- v) m (analyze f)!!pc) ->
+Inductive match_stackframes: list stackframe -> list stackframe -> typ -> Prop :=
+  | match_stackframes_nil:
+      match_stackframes nil nil Tint
+  | match_stackframes_cons:
+      forall res sp pc rs f approx tyenv s s' ty
+           (ANALYZE: analyze f = Some approx)
+           (WTF: wt_function f tyenv)
+           (WTREGS: wt_regset tyenv rs)
+           (TYRES: tyenv res = ty)
+           (SAT: forall v m, numbering_satisfiable ge sp (rs#res <- v) m approx!!pc)
+           (STACKS: match_stackframes s s' (proj_sig_res (fn_sig f))),
     match_stackframes
-      (Stackframe res f sp pc rs)
-      (Stackframe res (transf_function f) sp pc rs).
+      (Stackframe res f sp pc rs :: s)
+      (Stackframe res (transf_function' f approx) sp pc rs :: s')
+      ty.
 
 Inductive match_states: state -> state -> Prop :=
   | match_states_intro:
-      forall s sp pc rs m s' f
-             (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc)
-             (STACKS: list_forall2 match_stackframes s s'),
+      forall s sp pc rs m s' f approx tyenv
+             (ANALYZE: analyze f = Some approx)
+             (WTF: wt_function f tyenv)
+             (WTREGS: wt_regset tyenv rs)
+             (SAT: numbering_satisfiable ge sp rs m approx!!pc)
+             (STACKS: match_stackframes s s' (proj_sig_res (fn_sig f))),
       match_states (State s f sp pc rs m)
-                   (State s' (transf_function f) sp pc rs m)
+                   (State s' (transf_function' f approx) sp pc rs m)
   | match_states_call:
-      forall s f args m s',
-      list_forall2 match_stackframes s s' ->
+      forall s f tf args m s',
+      match_stackframes s s' (proj_sig_res (funsig f)) ->
+      Val.has_type_list args (sig_args (funsig f)) ->
+      transf_fundef f = OK tf ->
       match_states (Callstate s f args m)
-                   (Callstate s' (transf_fundef f) args m)
+                   (Callstate s' tf args m)
   | match_states_return:
-      forall s s' v m,
-      list_forall2 match_stackframes s s' ->
+      forall s s' ty v m,
+      match_stackframes s s' ty ->
+      Val.has_type v ty ->
       match_states (Returnstate s v m)
                    (Returnstate s' v m).
 
 Ltac TransfInstr :=
   match goal with
-  | H1: (PTree.get ?pc ?c = Some ?instr), f: function |- _ =>
-      cut ((transf_function f).(fn_code)!pc = Some(transf_instr (analyze f)!!pc instr));
+  | H1: (PTree.get ?pc ?c = Some ?instr), f: function, approx: PMap.t numbering |- _ =>
+      cut ((transf_function' f approx).(fn_code)!pc = Some(transf_instr approx!!pc instr));
       [ simpl transf_instr
-      | unfold transf_function, transf_code; simpl; rewrite PTree.gmap; 
+      | unfold transf_function', transf_code; simpl; rewrite PTree.gmap; 
         unfold option_map; rewrite H1; reflexivity ]
   end.
 
@@ -815,89 +887,105 @@ Proof.
   induction 1; intros; inv MS; try (TransfInstr; intro C).
 
   (* Inop *)
-  exists (State s' (transf_function f) sp pc' rs m); split.
+  exists (State s' (transf_function' f approx) sp pc' rs m); split.
   apply exec_Inop; auto.
   econstructor; eauto. 
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H; auto.
 
   (* Iop *)
-  exists (State s' (transf_function f) sp pc' (rs#res <- v) m); split.
+  exists (State s' (transf_function' f approx) sp pc' (rs#res <- v) m); split.
   assert (eval_operation tge sp op rs##args m = Some v).
     rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved.
-  generalize C; clear C.
-  case (is_trivial_op op).
-  intro. eapply exec_Iop'; eauto. 
-  caseEq (find_op (analyze f)!!pc op args). intros r FIND CODE.
+  destruct (is_trivial_op op) as []_eqn.
+  eapply exec_Iop'; eauto. 
+  destruct (find_op approx!!pc op args) as [r|]_eqn. 
   eapply exec_Iop'; eauto. simpl. 
   assert (eval_operation ge sp op rs##args m = Some rs#r).
     eapply find_op_correct; eauto.
     eapply wf_analyze; eauto.
   congruence.
-  intros. eapply exec_Iop'; eauto.
+  eapply exec_Iop'; eauto.
   econstructor; eauto.
+  apply wt_regset_assign; auto. 
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  simpl in H0. inv H0. rewrite <- H3. apply WTREGS.
+  replace (tyenv res) with (snd (type_of_operation op)).
+  eapply type_of_operation_sound; eauto.
+  rewrite <- H6. reflexivity.
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H. 
-  eapply add_op_satisfiable; eauto. apply wf_analyze.
+  eapply add_op_satisfiable; eauto. eapply wf_analyze; eauto.
 
   (* Iload *)
-  exists (State s' (transf_function f) sp pc' (rs#dst <- v) m); split.
+  exists (State s' (transf_function' f approx) sp pc' (rs#dst <- v) m); split.
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
-  generalize C; clear C.
-  caseEq (find_load (analyze f)!!pc chunk addr args). intros r FIND CODE.
+  destruct (find_load approx!!pc chunk addr args) as [r|]_eqn.
   eapply exec_Iop'; eauto. simpl. 
   assert (exists a, eval_addressing ge sp addr rs##args = Some a
                  /\ Mem.loadv chunk m a = Some rs#r).
     eapply find_load_correct; eauto.
     eapply wf_analyze; eauto.
-  elim H3; intros a' [A B].
+  destruct H3 as [a' [A B]].
   congruence.
-  intros. eapply exec_Iload'; eauto.
+  eapply exec_Iload'; eauto.
   econstructor; eauto.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  apply wt_regset_assign. auto. rewrite H8. eapply type_of_chunk_correct; eauto.
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H. 
-  eapply add_load_satisfiable; eauto. apply wf_analyze.
+  eapply add_load_satisfiable; eauto. eapply wf_analyze; eauto.
 
   (* Istore *)
-  exists (State s' (transf_function f) sp pc' rs m'); split.
+  exists (State s' (transf_function' f approx) sp pc' rs m'); split.
   assert (eval_addressing tge sp addr rs##args = Some a).
     rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
   eapply exec_Istore; eauto.
   econstructor; eauto.
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H.
-  eapply kill_load_satisfiable; eauto. 
+  eapply add_store_satisfiable; eauto. eapply wf_analyze; eauto.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  rewrite <- H8. auto.
 
   (* Icall *)
-  exploit find_function_translated; eauto. intro FIND'.
+  exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. 
   econstructor; split.
   eapply exec_Icall; eauto.
-  apply sig_preserved. 
+  apply sig_preserved; auto.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
   econstructor; eauto. 
-  constructor; auto. 
   econstructor; eauto. 
   intros. eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H. 
   apply empty_numbering_satisfiable.
+  rewrite <- H7. apply wt_regset_list; auto. 
 
   (* Itailcall *)
-  exploit find_function_translated; eauto. intro FIND'.
+  exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. 
   econstructor; split.
   eapply exec_Itailcall; eauto.
-  apply sig_preserved. 
-  econstructor; eauto.
+  apply sig_preserved; auto.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  econstructor; eauto. 
+  replace (proj_sig_res (funsig fd)) with (proj_sig_res (fn_sig f)). auto. 
+  unfold proj_sig_res. rewrite H6; auto.
+  rewrite <- H7. apply wt_regset_list; auto.
 
   (* Ibuiltin *)
   econstructor; split.
   eapply exec_Ibuiltin; eauto. 
   eapply external_call_symbols_preserved; eauto.
   exact symbols_preserved. exact varinfo_preserved.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
   econstructor; eauto.
+  apply wt_regset_assign; auto. 
+  rewrite H6. eapply external_call_well_typed; eauto. 
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H. 
-  apply add_unknown_satisfiable. apply wf_kill_loads. apply wf_analyze.
-  eapply kill_load_satisfiable; eauto. 
+  apply add_unknown_satisfiable. apply wf_kill_equations. eapply wf_analyze; eauto.
+  eapply kill_loads_satisfiable; eauto. 
 
   (* Icond *)
   econstructor; split.
@@ -916,26 +1004,36 @@ Proof.
   (* Ireturn *)
   econstructor; split.
   eapply exec_Ireturn; eauto.
-  constructor; auto.
+  econstructor; eauto.
+  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  unfold proj_sig_res. rewrite <- H2. destruct or; simpl; auto. 
 
   (* internal function *)
-  simpl. econstructor; split.
+  monadInv H7. unfold transf_function in EQ. 
+  destruct (type_function f) as [tyenv|]_eqn; try discriminate.
+  destruct (analyze f) as [approx|]_eqn; inv EQ. 
+  assert (WTF: wt_function f tyenv). apply type_function_correct; auto.
+  econstructor; split.
   eapply exec_function_internal; eauto. 
   simpl. econstructor; eauto.
-  apply analysis_correct_entry. 
+  apply wt_init_regs. inv WTF. rewrite wt_params; auto.
+  apply analysis_correct_entry; auto.
 
   (* external function *)
-  simpl. econstructor; split.
+  monadInv H7. 
+  econstructor; split.
   eapply exec_function_external; eauto.
   eapply external_call_symbols_preserved; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
+  simpl. eapply external_call_well_typed; eauto. 
 
   (* return *)
-  inv H3. inv H1.
+  inv H3.
   econstructor; split.
   eapply exec_return; eauto.
-  econstructor; eauto. 
+  econstructor; eauto.
+  apply wt_regset_assign; auto. 
 Qed.
 
 Lemma transf_initial_states:
@@ -943,14 +1041,15 @@ Lemma transf_initial_states:
   exists st2, initial_state tprog st2 /\ match_states st1 st2.
 Proof.
   intros. inversion H. 
-  exists (Callstate nil (transf_fundef f) nil m0); split.
+  exploit funct_ptr_translated; eauto. intros [tf [A B]].
+  exists (Callstate nil tf nil m0); split.
   econstructor; eauto.
-  apply Genv.init_mem_transf; auto.
-  change (prog_main tprog) with (prog_main prog).
+  eapply Genv.init_mem_transf_partial; eauto.
+  replace (prog_main tprog) with (prog_main prog).
   rewrite symbols_preserved. eauto.
-  apply funct_ptr_translated; auto.
-  rewrite <- H3. apply sig_preserved. 
-  constructor. constructor.
+  symmetry. eapply transform_partial_program_main; eauto.
+  rewrite <- H3. apply sig_preserved; auto.
+  constructor. rewrite H3. constructor. rewrite H3. constructor. auto.
 Qed.
 
 Lemma transf_final_states:
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 6779aaf..be4f981 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -140,7 +140,7 @@ Definition transf_rtl_fundef (f: RTL.fundef) : res Asm.fundef :=
    @@ print print_RTL_tailcall
    @@ Constprop.transf_fundef
    @@ print print_RTL_constprop
-   @@ CSE.transf_fundef
+  @@@ CSE.transf_fundef
    @@ print print_RTL_cse
   @@@ Allocation.transf_fundef
    @@ Tunneling.tunnel_fundef
@@ -338,7 +338,7 @@ Proof.
 
   eapply compose_forward_simulation. apply Tailcallproof.transf_program_correct. 
   eapply compose_forward_simulation. apply Constpropproof.transf_program_correct. 
-  eapply compose_forward_simulation. apply CSEproof.transf_program_correct.
+  eapply compose_forward_simulation. apply CSEproof.transf_program_correct. eassumption.
   eapply compose_forward_simulation. apply Allocproof.transf_program_correct. eassumption.
   eapply compose_forward_simulation. apply Tunnelingproof.transf_program_correct.
   eapply compose_forward_simulation. apply Linearizeproof.transf_program_correct. eassumption. eauto. 
diff --git a/ia32/Op.v b/ia32/Op.v
index 6389567..896badf 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -606,6 +606,61 @@ Proof.
   destruct c; simpl; try congruence. reflexivity. 
 Qed.
 
+(** Checking whether two addressings, applied to the same arguments, produce
+  separated memory addresses.  Used in [CSE]. *)
+
+Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
+                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
+  match addr1, addr2 with
+  | Aindexed ofs1, Aindexed ofs2 => 
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | Aglobal s1 ofs1, Aglobal s2 ofs2 => 
+      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
+  | Abased s1 ofs1, Abased s2 ofs2 => 
+      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
+  | Ainstack ofs1, Ainstack ofs2 =>
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | _, _ => false
+  end.
+
+Lemma addressing_separated_sound:
+  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
+  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
+  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
+  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
+  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
+Proof.
+  unfold addressing_separated; intros.
+  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
+  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
+(* Aindexed *)
+  destruct v; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+(* Aglobal *)
+  unfold symbol_address in *. 
+  destruct (Genv.find_symbol ge i1) as []_eqn; inv H2.
+  destruct (Genv.find_symbol ge i) as []_eqn; inv H1.
+  destruct (ident_eq i i1). subst.
+  replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)).
+  replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)).
+  right. apply Int.no_overlap_sound; auto. 
+  rewrite Int.add_commut; rewrite Int.add_zero; auto.
+  rewrite Int.add_commut; rewrite Int.add_zero; auto.
+  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
+(* Abased *)
+  unfold symbol_address in *. 
+  destruct (Genv.find_symbol ge i1) as []_eqn; simpl in *; try discriminate.
+  destruct v; inv H2.
+  destruct (Genv.find_symbol ge i) as []_eqn; inv H1.
+  destruct (ident_eq i i1). subst.
+  rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3).
+  right. apply Int.no_overlap_sound; auto. 
+  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
+(* Ainstack *)
+  destruct sp; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+Qed.
+
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment
diff --git a/lib/Integers.v b/lib/Integers.v
index 9f58de3..1e58c2d 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -782,6 +782,30 @@ Proof.
   auto with ints. omega.
 Qed.
 
+Theorem unsigned_add_carry:
+  forall x y,
+  unsigned (add x y) = unsigned x + unsigned y - unsigned (add_carry x y zero) * modulus.
+Proof.
+  intros.
+  unfold add, add_carry. rewrite unsigned_zero. rewrite Zplus_0_r. simpl. 
+  generalize (unsigned_range x) (unsigned_range y). intros. 
+  destruct (zlt (unsigned x + unsigned y) modulus). 
+  rewrite unsigned_zero. apply Zmod_unique with 0. omega. omega. 
+  rewrite unsigned_one. apply Zmod_unique with 1. omega. omega. 
+Qed. 
+
+Corollary unsigned_add_either:
+  forall x y,
+  unsigned (add x y) = unsigned x + unsigned y
+  \/ unsigned (add x y) = unsigned x + unsigned y - modulus.
+Proof.
+  intros. rewrite unsigned_add_carry. unfold add_carry. 
+  rewrite unsigned_zero. rewrite Zplus_0_r. 
+  destruct (zlt (unsigned x + unsigned y) modulus).
+  rewrite unsigned_zero. left; omega.
+  rewrite unsigned_one. right; omega.
+Qed.
+
 (** ** Properties of negation *)
 
 Theorem neg_repr: forall z, neg (repr z) = repr (-z).
@@ -3023,6 +3047,32 @@ Proof.
   generalize (unsigned_range x). omega. omega.
 Qed.
 
+(** Non-overlapping test *)
+
+Definition no_overlap (ofs1: int) (sz1: Z) (ofs2: int) (sz2: Z) : bool :=
+  let x1 := unsigned ofs1 in let x2 := unsigned ofs2 in
+     zlt (x1 + sz1) modulus && zlt (x2 + sz2) modulus
+  && (zle (x1 + sz1) x2 || zle (x2 + sz2) x1).
+
+Lemma no_overlap_sound:
+  forall ofs1 sz1 ofs2 sz2 base,
+  sz1 > 0 -> sz2 > 0 -> no_overlap ofs1 sz1 ofs2 sz2 = true ->
+  unsigned (add base ofs1) + sz1 <= unsigned (add base ofs2)
+  \/ unsigned (add base ofs2) + sz2 <= unsigned (add base ofs1).
+Proof.
+  intros. 
+  destruct (andb_prop _ _ H1). clear H1. 
+  destruct (andb_prop _ _ H2). clear H2.
+  exploit proj_sumbool_true. eexact H1. intro A; clear H1.
+  exploit proj_sumbool_true. eexact H4. intro B; clear H4.
+  assert (unsigned ofs1 + sz1 <= unsigned ofs2 \/ unsigned ofs2 + sz2 <= unsigned ofs1).
+  destruct (orb_prop _ _ H3). left. eapply proj_sumbool_true; eauto. right. eapply proj_sumbool_true; eauto.
+  clear H3.
+  generalize (unsigned_range ofs1) (unsigned_range ofs2). intros P Q.
+  generalize (unsigned_add_either base ofs1) (unsigned_add_either base ofs2).
+  intros [C|C] [D|D]; omega.
+Qed.
+
 End Make.
 
 (** * Specialization to integers of size 8, 32, and 64 bits *)
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 68b349e..d2b7ed5 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -541,6 +541,61 @@ Proof.
   destruct c; simpl; auto; discriminate. 
 Qed.
 
+(** Checking whether two addressings, applied to the same arguments, produce
+  separated memory addresses.  Used in [CSE]. *)
+
+Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
+                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
+  match addr1, addr2 with
+  | Aindexed ofs1, Aindexed ofs2 => 
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | Aglobal s1 ofs1, Aglobal s2 ofs2 => 
+      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
+  | Abased s1 ofs1, Abased s2 ofs2 => 
+      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
+  | Ainstack ofs1, Ainstack ofs2 =>
+      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
+  | _, _ => false
+  end.
+
+Lemma addressing_separated_sound:
+  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
+  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
+  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
+  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
+  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
+Proof.
+  unfold addressing_separated; intros.
+  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
+  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
+(* Aindexed *)
+  destruct v; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+(* Aglobal *)
+  unfold symbol_address in *. 
+  destruct (Genv.find_symbol ge i1) as []_eqn; inv H2.
+  destruct (Genv.find_symbol ge i) as []_eqn; inv H1.
+  destruct (ident_eq i i1). subst.
+  replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)).
+  replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)).
+  right. apply Int.no_overlap_sound; auto. 
+  rewrite Int.add_commut; rewrite Int.add_zero; auto.
+  rewrite Int.add_commut; rewrite Int.add_zero; auto.
+  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
+(* Abased *)
+  unfold symbol_address in *. 
+  destruct (Genv.find_symbol ge i1) as []_eqn; simpl in *; try discriminate.
+  destruct v; inv H2.
+  destruct (Genv.find_symbol ge i) as []_eqn; inv H1.
+  destruct (ident_eq i i1). subst.
+  rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3).
+  right. apply Int.no_overlap_sound; auto. 
+  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
+(* Ainstack *)
+  destruct sp; simpl in *; inv H1; inv H2.
+  right. apply Int.no_overlap_sound; auto. 
+Qed.
+
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment