From 56579f8ade21cb0a880ffbd6d5e28f152e951be8 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sun, 6 Apr 2014 07:11:12 +0000
Subject: Merge of branch linear-typing:

1) Revised division of labor between RTLtyping and Lineartyping:
- RTLtyping no longer keeps track of single-precision floats,
  switches from subtype-based inference to unification-based inference.
- Unityping: new library for unification-based inference.
- Locations: don't normalize at assignment in a stack slot
- Allocation, Allocproof: simplify accordingly.
- Lineartyping: add inference of locations that contain
  a single-precision float.
- Stackingproof: adapted accordingly.

This addresses a defect report whereas RTLtyping was rejecting code that used a RTL pseudoreg to hold both double- and single-precision floats (see test/regression/singlefloats.c).

This corresponds to commits 2435+2436 plus improvements in Lineartyping.

2) Add -dtimings option to measure compilation times.
Moved call to C parser from Elab to Parse, to make it easier to
measure parsing time independently of elaboration time.


git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2449 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 backend/Allocation.v    | 117 +++----
 backend/Allocproof.v    | 254 +++++++-------
 backend/Lineartyping.v  | 855 +++++++++++++++++++++++++++++++++++++++++++-----
 backend/Locations.v     |  24 +-
 backend/RTLtyping.v     | 428 ++++++++++--------------
 backend/Stackingproof.v | 207 +++++-------
 6 files changed, 1214 insertions(+), 671 deletions(-)

(limited to 'backend')

diff --git a/backend/Allocation.v b/backend/Allocation.v
index f830d79..f4fcd6e 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -368,7 +368,7 @@ End IndexedEqKind.
 
 Module OrderedEqKind := OrderedIndexed(IndexedEqKind).
 
-(** This is an order over equations that is lexicgraphic on [ereg], then
+(** This is an order over equations that is lexicographic on [ereg], then
   [eloc], then [ekind]. *)
 
 Module OrderedEquation <: OrderedType.
@@ -609,20 +609,6 @@ Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
      (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
      (Some e).
 
-(** [loc_type_compat env l e] checks that for all equations [r = l] in [e],
-  the type [env r] of [r] is compatible with the type of [l]. *)
-
-Definition sel_type (k: equation_kind) (ty: typ) : typ :=
-  match k with
-  | Full => ty
-  | Low | High => Tint
-  end.
-
-Definition loc_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
-  EqSet2.for_all_between
-    (fun q => subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l))
-    (select_loc_l l) (select_loc_h l) (eqs2 e).
-
 (** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
     [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
 *)
@@ -773,25 +759,18 @@ Definition destroyed_by_move (src dst: loc) :=
   | _, _ => destroyed_by_op Omove
   end.
 
-Definition well_typed_move (env: regenv) (dst: loc) (e: eqs) : bool :=
-  match dst with
-  | R r => true
-  | S sl ofs ty => loc_type_compat env dst e
-  end.
-
 (** Simulate the effect of a sequence of moves [mv] on a set of
   equations [e].  The set [e] is the equations that must hold
   after the sequence of moves.  Return the set of equations that
   must hold before the sequence of moves.  Return [None] if the
   set of equations [e] cannot hold after the sequence of moves. *)
 
-Fixpoint track_moves (env: regenv) (mv: moves) (e: eqs) : option eqs :=
+Fixpoint track_moves (mv: moves) (e: eqs) : option eqs :=
   match mv with
   | nil => Some e
   | (src, dst) :: mv =>
-      do e1 <- track_moves env mv e;
+      do e1 <- track_moves mv e;
       assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
-      assertion (well_typed_move env dst e1);
       subst_loc dst src e1
   end.
 
@@ -824,89 +803,89 @@ Definition kind_second_word := if Archi.big_endian then Low else High.
   equations that must hold "before" these instructions, or [None] if
   impossible. *)
 
-Definition transfer_aux (f: RTL.function) (env: regenv) (shape: block_shape) (e: eqs) : option eqs :=
+Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs :=
   match shape with
   | BSnop mv s =>
-      track_moves env mv e
+      track_moves mv e
   | BSmove src dst mv s =>
-      track_moves env mv (subst_reg dst src e)
+      track_moves mv (subst_reg dst src e)
   | BSmakelong src1 src2 dst mv s =>
       let e1 := subst_reg_kind dst High src1 Full e in
       let e2 := subst_reg_kind dst Low src2 Full e1 in
       assertion (reg_unconstrained dst e2);
-      track_moves env mv e2
+      track_moves mv e2
   | BSlowlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src Low e in
       assertion (reg_unconstrained dst e1);
-      track_moves env mv e1
+      track_moves mv e1
   | BShighlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src High e in
       assertion (reg_unconstrained dst e1);
-      track_moves env mv e1
+      track_moves mv e1
   | BSop op args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1;
-      track_moves env mv1 e2
+      track_moves mv1 e2
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
-      track_moves env mv e
+      track_moves mv e
   | BSload chunk addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
-      track_moves env mv1 e2
+      track_moves mv1 e2
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s =>
-      do e1 <- track_moves env mv3 e;
+      do e1 <- track_moves mv3 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in
       assertion (loc_unconstrained (R dst2') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args2' e2;
-      do e4 <- track_moves env mv2 e3;
+      do e4 <- track_moves mv2 e3;
       let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in
       assertion (loc_unconstrained (R dst1') e5);
       assertion (can_undef (destroyed_by_load Mint32 addr) e5);
       assertion (reg_unconstrained dst e5);
       do e6 <- add_equations args args1' e5;
-      track_moves env mv1 e6
+      track_moves mv1 e6
   | BSload2_1 addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr) e2);
       do e3 <- add_equations args args' e2;
-      track_moves env mv1 e3
+      track_moves mv1 e3
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args' e2;
-      track_moves env mv1 e3
+      track_moves mv1 e3
   | BSloaddead chunk addr args dst mv s =>
       assertion (reg_unconstrained dst e);
-      track_moves env mv e
+      track_moves mv e
   | BSstore chunk addr args src mv args' src' s =>
       assertion (can_undef (destroyed_by_store chunk addr) e);
       do e1 <- add_equations (src :: args) (src' :: args') e;
-      track_moves env mv e1
+      track_moves mv e1
   | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s =>
       assertion (can_undef (destroyed_by_store Mint32 addr') e);
       do e1 <- add_equations args args2' 
                   (add_equation (Eq kind_second_word src (R src2')) e);
-      do e2 <- track_moves env mv2 e1;
+      do e2 <- track_moves mv2 e1;
       assertion (can_undef (destroyed_by_store Mint32 addr) e2);
       do e3 <- add_equations args args1' 
                   (add_equation (Eq kind_first_word src (R src1')) e2);
-      track_moves env mv1 e3
+      track_moves mv1 e3
   | BScall sg ros args res mv1 ros' mv2 s =>
       let args' := loc_arguments sg in
       let res' := map R (loc_result sg) in
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       do e2 <- remove_equations_res res (sig_res sg) res' e1;
       assertion (forallb (fun l => reg_loc_unconstrained res l e2) res');
       assertion (no_caller_saves e2);
       do e3 <- add_equation_ros ros ros' e2;
       do e4 <- add_equations_args args (sig_args sg) args' e3;
-      track_moves env mv1 e4
+      track_moves mv1 e4
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
@@ -914,9 +893,9 @@ Definition transfer_aux (f: RTL.function) (env: regenv) (shape: block_shape) (e:
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
-      track_moves env mv1 e2
+      track_moves mv1 e2
   | BSbuiltin ef args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves env mv2 e;
+      do e1 <- track_moves mv2 e;
       let args' := map R args' in
       let res' := map R res' in
       do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1;
@@ -924,23 +903,23 @@ Definition transfer_aux (f: RTL.function) (env: regenv) (shape: block_shape) (e:
       assertion (forallb (fun l => loc_unconstrained l e2) res');
       assertion (can_undef (destroyed_by_builtin ef) e2);
       do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2;
-      track_moves env mv1 e3
+      track_moves mv1 e3
   | BSannot txt typ args res mv1 args' s =>
       do e1 <- add_equations_args args (annot_args_typ typ) args' e;
-      track_moves env mv1 e1
+      track_moves mv1 e1
   | BScond cond args mv args' s1 s2 =>
       assertion (can_undef (destroyed_by_cond cond) e);
       do e1 <- add_equations args args' e;
-      track_moves env mv e1
+      track_moves mv e1
   | BSjumptable arg mv arg' tbl =>
       assertion (can_undef destroyed_by_jumptable e);
-      track_moves env mv (add_equation (Eq Full arg (R arg')) e)
+      track_moves mv (add_equation (Eq Full arg (R arg')) e)
   | BSreturn None mv =>
-      track_moves env mv empty_eqs
+      track_moves mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := map R (loc_result (RTL.fn_sig f)) in
       do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
-      track_moves env mv e1
+      track_moves mv e1
   end.
 
 (** The main transfer function for the dataflow analysis.  Like [transfer_aux],
@@ -948,7 +927,7 @@ Definition transfer_aux (f: RTL.function) (env: regenv) (shape: block_shape) (e:
   equations that must hold "after".  It also handles error propagation
   and reporting. *)
 
-Definition transfer (f: RTL.function) (env: regenv) (shapes: PTree.t block_shape)
+Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
                     (pc: node) (after: res eqs) : res eqs :=
   match after with
   | Error _ => after
@@ -956,7 +935,7 @@ Definition transfer (f: RTL.function) (env: regenv) (shapes: PTree.t block_shape
       match shapes!pc with
       | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil)
       | Some shape =>
-          match transfer_aux f env shape e with
+          match transfer_aux f shape e with
           | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil)
           | Some e' => OK e'
           end
@@ -1104,8 +1083,8 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BSreturn optarg mv => nil
   end.
 
-Definition analyze (f: RTL.function) (env: regenv) (bsh: PTree.t block_shape) :=
-  DS.fixpoint_allnodes bsh successors_block_shape (transfer f env bsh).
+Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) :=
+  DS.fixpoint_allnodes bsh successors_block_shape (transfer f bsh).
 
 (** * Validating and translating functions and programs *)
 
@@ -1139,9 +1118,9 @@ Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e
   and stack size. *)
 
 Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function)
-                                 (env: regenv) (e1: eqs) : option unit :=
+                                 (e1: eqs) : option unit :=
   do mv <- pair_entrypoints rtl ltl;
-  do e2 <- track_moves env mv e1;
+  do e2 <- track_moves mv e1;
   assertion (compat_entry (RTL.fn_params rtl)
                           (sig_args (RTL.fn_sig rtl))
                           (loc_parameters (RTL.fn_sig rtl)) e2);
@@ -1154,10 +1133,10 @@ Local Close Scope option_monad_scope.
 Local Open Scope error_monad_scope.
 
 Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
-                             (env: regenv) (bsh: PTree.t block_shape)
+                             (bsh: PTree.t block_shape)
                              (a: PMap.t LEq.t): res unit :=
-  do e1 <- transfer rtl env bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
-  match check_entrypoints_aux rtl ltl env e1 with
+  do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
+  match check_entrypoints_aux rtl ltl e1 with
   | None => Error (msg "invalid register allocation at entry point")
   | Some _ => OK tt
   end.
@@ -1166,11 +1145,11 @@ Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
   a source RTL function and an LTL function generated by the external
   register allocator. *)
 
-Definition check_function (rtl: RTL.function) (ltl: LTL.function) (env: regenv) : res unit :=
+Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit :=
   let bsh := pair_codes rtl ltl in
-  match analyze rtl env bsh with
+  match analyze rtl bsh with
   | None => Error (msg "allocation analysis diverges")
-  | Some a => check_entrypoints rtl ltl env bsh a
+  | Some a => check_entrypoints rtl ltl bsh a
   end.
 
 (** [regalloc] is the external register allocator.  It is written in OCaml
@@ -1186,7 +1165,7 @@ Definition transf_function (f: RTL.function) : res LTL.function :=
   | OK env =>
       match regalloc f with
       | Error m => Error m
-      | OK tf => do x <- check_function f tf env; OK tf
+      | OK tf => do x <- check_function f tf; OK tf
       end
   end.
 
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 4c39fee..9303878 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -451,7 +451,7 @@ Lemma add_equations_args_lessdef:
   forall rs ls rl tyl ll e e',
   add_equations_args rl tyl ll e = Some e' ->
   satisf rs ls e' ->
-  Val.has_type_list (rs##rl) tyl ->
+  Val.has_type_list (rs##rl) (normalize_list tyl) ->
   Val.lessdef_list (rs##rl) (decode_longs tyl (map ls ll)).
 Proof.
   intros until e'. functional induction (add_equations_args rl tyl ll e); simpl; intros.
@@ -648,7 +648,7 @@ Lemma loc_unconstrained_satisf:
 Proof.
   intros; red; intros. 
   destruct (OrderedEquation.eq_dec q (Eq k r l)). 
-  subst q; simpl. unfold l; rewrite Locmap.gss_reg. auto.
+  subst q; simpl. unfold l; rewrite Locmap.gss. auto.
   assert (EqSet.In q (remove_equation (Eq k r l) e)).
     simpl. ESD.fsetdec.  
   rewrite Locmap.gso. apply H; auto. eapply loc_unconstrained_sound; eauto. 
@@ -674,7 +674,7 @@ Lemma parallel_assignment_satisf:
 Proof.
   intros; red; intros.
   destruct (OrderedEquation.eq_dec q (Eq k r l)).
-  subst q; simpl. unfold l; rewrite Regmap.gss; rewrite Locmap.gss_reg; auto.
+  subst q; simpl. unfold l; rewrite Regmap.gss; rewrite Locmap.gss; auto.
   assert (EqSet.In q (remove_equation {| ekind := k; ereg := r; eloc := l |} e)).
     simpl. ESD.fsetdec.
   exploit reg_loc_unconstrained_sound; eauto. intros [A B].
@@ -702,11 +702,11 @@ Proof.
   rewrite <- H5 in H2. simpl in H2. InvBooleans. simpl.
   destruct (OrderedEquation.eq_dec q (Eq Low res l2)).
   subst q; simpl. rewrite Regmap.gss.
-  destruct (ISREG l2) as [r2 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
+  destruct (ISREG l2) as [r2 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss.
   apply Val.loword_lessdef; auto. 
   destruct (OrderedEquation.eq_dec q (Eq High res l1)).
   subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto.
-  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
+  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss.
   apply Val.hiword_lessdef; auto.
   assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. 
   rewrite Regmap.gso. rewrite ! Locmap.gso. auto. 
@@ -719,7 +719,7 @@ Proof.
   set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *.
   destruct (OrderedEquation.eq_dec q (Eq Full res l1)). 
   subst q; simpl. rewrite Regmap.gss.
-  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
+  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss.
   auto.
   assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. 
   simpl. rewrite Regmap.gso. rewrite Locmap.gso. auto.
@@ -969,6 +969,7 @@ Proof.
   split. apply eqs_same; auto. auto. 
 Qed.
 
+(*
 Lemma loc_type_compat_charact:
   forall env l e q,
   loc_type_compat env l e = true ->
@@ -1008,7 +1009,8 @@ Proof.
   destruct (rs#r); exact I.
   eelim Loc.diff_not_eq. eexact A. auto.
 Qed.
-
+*)
+(*
 Remark val_lessdef_normalize:
   forall v v' ty,
   Val.has_type v ty -> Val.lessdef v v' ->
@@ -1016,23 +1018,19 @@ Remark val_lessdef_normalize:
 Proof.
   intros. inv H0. rewrite Val.load_result_same; auto. auto. 
 Qed.
+*)
 
 Lemma subst_loc_satisf:
   forall env src dst rs ls e e',
   subst_loc dst src e = Some e' ->
-  well_typed_move env dst e = true ->
+  (*well_typed_move env dst e = true ->*)
   wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) ls) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
-  subst dst. rewrite Locmap.gss.
-  destruct q as [k r l]; simpl in *.
-  exploit well_typed_move_charact; eauto.
-  destruct l as [mr | sl ofs ty]; intros.
-  apply (H2 _ B). 
-  apply val_lessdef_normalize; auto. apply (H2 _ B). 
+  subst dst. rewrite Locmap.gss. apply (H1 _ B).
   rewrite Locmap.gso; auto.
 Qed.
 
@@ -1079,22 +1077,18 @@ Proof.
 Qed.
 
 Lemma subst_loc_undef_satisf:
-  forall env src dst rs ls ml e e',
+  forall src dst rs ls ml e e',
   subst_loc dst src e = Some e' ->
-  well_typed_move env dst e = true ->
+  (*well_typed_move env dst e = true ->*)
   can_undef_except dst ml e = true ->
-  wt_regset env rs ->
+  (*wt_regset env rs ->*)
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
   subst dst. rewrite Locmap.gss.
-  destruct q as [k r l]; simpl in *.
-  exploit well_typed_move_charact; eauto.
-  destruct l as [mr | sl ofs ty]; intros.
-  apply (H3 _ B). 
-  apply val_lessdef_normalize; auto. apply (H3 _ B). 
+  destruct q as [k r l]; simpl in *. apply (H1 _ B). 
   rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. 
   eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto.
 Qed.
@@ -1336,27 +1330,27 @@ Qed.
 (** * Properties of the dataflow analysis *)
 
 Lemma analyze_successors:
-  forall f env bsh an pc bs s e,
-  analyze f env bsh = Some an ->
+  forall f bsh an pc bs s e,
+  analyze f bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
-  exists e', transfer f env bsh s an!!s = OK e' /\ EqSet.Subset e' e.
+  exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e.
 Proof.
   unfold analyze; intros. exploit DS.fixpoint_allnodes_solution; eauto.
-  rewrite H2. unfold DS.L.ge. destruct (transfer f env bsh s an#s); intros.
+  rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros.
   exists e0; auto. 
   contradiction.
 Qed.
 
 Lemma satisf_successors:
-  forall f env bsh an pc bs s e rs ls,
-  analyze f env bsh = Some an ->
+  forall f bsh an pc bs s e rs ls,
+  analyze f bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
   satisf rs ls e ->
-  exists e', transfer f env bsh s an!!s = OK e' /\ satisf rs ls e'.
+  exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'.
 Proof.
   intros. exploit analyze_successors; eauto. intros [e' [A B]]. 
   exists e'; split; auto. eapply satisf_incr; eauto.
@@ -1366,13 +1360,12 @@ Qed.
 
 Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop :=
   | transf_function_spec_intro:  
-      forall env an mv k e1 e2,
-      wt_function f env ->
-      analyze f env (pair_codes f tf) = Some an ->
+      forall an mv k e1 e2,
+      analyze f (pair_codes f tf) = Some an ->
       (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
       wf_moves mv ->
-      transfer f env (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
-      track_moves env mv e1 = Some e2 ->
+      transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
+      track_moves mv e1 = Some e2 ->
       compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
       can_undef destroyed_at_function_entry e2 = true ->
       RTL.fn_stacksize f = LTL.fn_stacksize tf ->
@@ -1387,36 +1380,33 @@ Proof.
   unfold transf_function; intros.
   destruct (type_function f) as [env|] eqn:TY; try discriminate.
   destruct (regalloc f); try discriminate.
-  destruct (check_function f f0 env) as [] eqn:?; inv H.
+  destruct (check_function f f0) as [] eqn:?; inv H.
   unfold check_function in Heqr. 
-  destruct (analyze f env (pair_codes f tf)) as [an|] eqn:?; try discriminate.
+  destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate.
   monadInv Heqr. 
-  destruct (check_entrypoints_aux f tf env x) as [y|] eqn:?; try discriminate.
+  destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate.
   unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
   exploit extract_moves_sound; eauto. intros [A B]. subst b.
   exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
-  econstructor; eauto. eapply type_function_correct; eauto. congruence. 
+  econstructor; eauto. congruence. 
 Qed.
 
 Lemma invert_code:
-  forall f env tf pc i opte e,
-  wt_function f env ->
+  forall f tf pc i opte e,
   (RTL.fn_code f)!pc = Some i ->
-  transfer f env (pair_codes f tf) pc opte = OK e ->
+  transfer f (pair_codes f tf) pc opte = OK e ->
   exists eafter, exists bsh, exists bb,
   opte = OK eafter /\ 
   (pair_codes f tf)!pc = Some bsh /\
   (LTL.fn_code tf)!pc = Some bb /\
   expand_block_shape bsh i bb /\
-  transfer_aux f env bsh eafter = Some e /\
-  wt_instr f env i.
+  transfer_aux f bsh eafter = Some e.
 Proof.
-  intros. destruct opte as [eafter|]; simpl in H1; try discriminate. exists eafter.
+  intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter.
   destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. 
   exploit matching_instr_block; eauto. intros [bb [A B]].
-  destruct (transfer_aux f env bsh eafter) as [e1|] eqn:?; inv H1. 
-  exists bb. exploit wt_instr_at; eauto.
-  tauto. 
+  destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. 
+  exists bb. tauto. 
 Qed.
 
 (** * Semantic preservation *)
@@ -1490,11 +1480,10 @@ Proof.
 Qed.
 
 Lemma exec_moves:
-  forall mv env rs s f sp bb m e e' ls,
-  track_moves env mv e = Some e' ->
+  forall mv rs s f sp bb m e e' ls,
+  track_moves mv e = Some e' ->
   wf_moves mv ->
   satisf rs ls e' ->
-  wt_regset env rs ->
   exists ls',
     star step tge (Block s f sp (expand_moves mv bb) ls m)
                E0 (Block s f sp bb ls' m)
@@ -1506,7 +1495,7 @@ Opaque destroyed_by_op.
 - unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
   (* step *)
 - destruct a as [src dst]. unfold expand_moves. simpl. 
-  destruct (track_moves env mv e) as [e1|] eqn:?; MonadInv.
+  destruct (track_moves mv e) as [e1|] eqn:?; MonadInv.
   assert (wf_moves mv). red; intros. apply H0; auto with coqlib. 
   destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
   (* reg-reg *)
@@ -1532,17 +1521,13 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
       sg.(sig_res) = Some Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
-      forall res f sp pc rs s tf bb ls ts sg an e env
+      forall res f sp pc rs s tf bb ls ts sg an e
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f env (pair_codes f tf) = Some an)
-        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
-        (WTF: wt_function f env)
-        (WTRS: wt_regset env rs)
-        (WTRES: subtype (proj_sig_res sg) (env res) = true)
+        (ANL: analyze f (pair_codes f tf) = Some an)
+        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
         (STEPS: forall v ls1 m,
            Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) ->
-           Val.has_type v (env res) ->
            agree_callee_save ls ls1 ->
            exists ls2,
            star LTL.step tge (Block ts tf sp bb ls1 m)
@@ -1555,15 +1540,13 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
 
 Inductive match_states: RTL.state -> LTL.state -> Prop :=
   | match_states_intro:
-      forall s f sp pc rs m ts tf ls m' an e env
+      forall s f sp pc rs m ts tf ls m' an e
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f env (pair_codes f tf) = Some an)
-        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f (pair_codes f tf) = Some an)
+        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
         (SAT: satisf rs ls e)
-        (MEM: Mem.extends m m')
-        (WTF: wt_function f env)
-        (WTRS: wt_regset env rs),
+        (MEM: Mem.extends m m'),
       match_states (RTL.State s f sp pc rs m)
                    (LTL.State ts tf sp pc ls m')
   | match_states_call:
@@ -1572,8 +1555,7 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (FUN: transf_fundef f = OK tf)
         (ARGS: Val.lessdef_list args (decode_longs (sig_args (funsig tf)) (map ls (loc_arguments (funsig tf)))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m')
-        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
+        (MEM: Mem.extends m m'),
       match_states (RTL.Callstate s f args m)
                    (LTL.Callstate ts tf ls m')
   | match_states_return:
@@ -1581,8 +1563,7 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (STACKS: match_stackframes s ts sg)
         (RES: Val.lessdef_list (encode_long (sig_res sg) res) (map ls (map R (loc_result sg))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m')
-        (WTRES: Val.has_type res (proj_sig_res sg)),
+        (MEM: Mem.extends m m'),
       match_states (RTL.Returnstate s res m)
                    (LTL.Returnstate ts ls m').
 
@@ -1595,14 +1576,13 @@ Proof.
   intros. inv H. 
   constructor. congruence.
   econstructor; eauto.
-  unfold proj_sig_res in *. rewrite H0; auto. 
   intros. unfold loc_result in H; rewrite H0 in H; eauto. 
 Qed.
 
 Ltac UseShape :=
   match goal with
-  | [ WT: wt_function _ _, CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ _ = OK _ |- _ ] =>
-      destruct (invert_code _ _ _ _ _ _ _ WT CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR & WTI); 
+  | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] =>
+      destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); 
       inv EBS; unfold transfer_aux in TR; MonadInv
   end.
 
@@ -1619,10 +1599,8 @@ Proof.
   { generalize args (type_of_addressing addr) H0 H1 H5.
     induction args0; simpl; intros.
     contradiction.
-    destruct l. discriminate. InvBooleans. 
-    destruct H2. subst a. 
-    assert (t = Tint) by auto with coqlib. subst t. 
-    destruct (env r); auto || discriminate. 
+    destruct l. discriminate. inv H4. 
+    destruct H2. subst a. apply H3; auto with coqlib. 
     eauto with coqlib. 
   }
   red; intros; subst r. rewrite H in H8; discriminate.
@@ -1632,11 +1610,11 @@ Qed.
     "plus" kind. *)
 
 Lemma step_simulation:
-  forall S1 t S2, RTL.step ge S1 t S2 ->
+  forall S1 t S2, RTL.step ge S1 t S2 -> wt_state S1 ->
   forall S1', match_states S1 S1' ->
   exists S2', plus LTL.step tge S1' t S2' /\ match_states S2 S2'.
 Proof.
-  induction 1; intros S1' MS; inv MS; try UseShape.
+  induction 1; intros WT S1' MS; inv MS; try UseShape.
 
 (* nop *)
   exploit exec_moves; eauto. intros [ls1 [X Y]]. 
@@ -1648,8 +1626,7 @@ Proof.
   econstructor; eauto. 
 
 (* op move *)
-- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
-  simpl in H0. inv H0.  
+- simpl in H0. inv H0.  
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1660,8 +1637,7 @@ Proof.
   econstructor; eauto.
 
 (* op makelong *)
-- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
-  simpl in H0. inv H0. 
+- simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1673,8 +1649,7 @@ Proof.
   econstructor; eauto.
 
 (* op lowlong *)
-- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
-  simpl in H0. inv H0. 
+- simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1686,8 +1661,7 @@ Proof.
   econstructor; eauto.
 
 (* op highlong *)
-- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
-  simpl in H0. inv H0. 
+- simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1699,8 +1673,7 @@ Proof.
   econstructor; eauto.
 
 (* op regular *)
-- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
-  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_operation_lessdef; eauto. intros [v' [F G]]. 
   exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
@@ -1724,11 +1697,9 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
-  eapply wt_exec_Iop; eauto. 
 
 (* load regular *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
-  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
   exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. 
@@ -1744,8 +1715,7 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
-  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1766,7 +1736,8 @@ Proof.
   assert (LD3: Val.lessdef_list rs##args (reglist ls3 args2')).
   { replace (rs##args) with ((rs#dst<-v)##args). 
     eapply add_equations_lessdef; eauto. 
-    apply list_map_exten; intros. rewrite Regmap.gso; auto. 
+    apply list_map_exten; intros. rewrite Regmap.gso; auto.
+    exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
     eapply addressing_not_long; eauto. 
   }
   exploit eval_addressing_lessdef. eexact LD3.
@@ -1798,8 +1769,7 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
-  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1829,8 +1799,7 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
-  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1870,7 +1839,6 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto.
 
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
@@ -1950,21 +1918,19 @@ Proof.
   exploit analyze_successors; eauto. simpl. left; eauto. intros [enext [U V]].
   econstructor; eauto.
   econstructor; eauto.
-  inv WTI. rewrite SIG. auto. 
   intros. exploit (exec_moves mv2). eauto. eauto. 
   eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto.
   eapply add_equation_ros_satisf; eauto. 
   eapply add_equations_args_satisf; eauto. 
   congruence.
-  apply wt_regset_assign; auto.
   intros [ls2 [A2 B2]].
   exists ls2; split. 
   eapply star_right. eexact A2. constructor. traceEq.
   apply satisf_incr with eafter; auto. 
   rewrite SIG. eapply add_equations_args_lessdef; eauto.
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
+  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
+  inv WTI. rewrite <- H7. apply wt_regset_list; auto. 
   simpl. red; auto.
-  inv WTI. rewrite SIG. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
 
 (* tailcall *)
 - set (sg := RTL.funsig fd) in *.
@@ -1985,16 +1951,16 @@ Proof.
   eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq.
   destruct (transf_function_inv _ _ FUN); auto.
   rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto.
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
+  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
+  inv WTI. rewrite <- H6. apply wt_regset_list; auto. 
   apply return_regs_agree_callee_save.
-  rewrite SIG. inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
 
 (* builtin *)
-- assert (WTRS': wt_regset env (rs#res <- v)) by (eapply wt_exec_Ibuiltin; eauto).
-  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto.
   eapply add_equations_args_lessdef; eauto.
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
+  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
+  inv WTI. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (E: map ls1 (map R args') = reglist ls1 args').
   { unfold reglist. rewrite list_map_compose. auto. }
@@ -2024,7 +1990,8 @@ Proof.
 (* annot *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto.
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
+  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
+  inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (v = Vundef). red in H0; inv H0. auto.
   econstructor; split.
@@ -2042,7 +2009,6 @@ Proof.
   econstructor; eauto.
   change (destroyed_by_builtin (EF_annot txt typ)) with (@nil mreg). 
   simpl. subst v. assumption.
-  apply wt_regset_assign; auto. subst v. constructor. 
 
 (* cond *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
@@ -2074,45 +2040,43 @@ Proof.
 
 (* return *)
 - destruct (transf_function_inv _ _ FUN). 
-  exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]].
-  inv WTI; MonadInv.
-  (* without an argument *)
-+ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
-  econstructor; split.
-  eapply plus_left. econstructor; eauto. 
-  eapply star_right. eexact A1.
-  econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto.
-  unfold encode_long, loc_result. rewrite <- H11; rewrite H13. simpl; auto.
-  apply return_regs_agree_callee_save.
-  constructor.
+  exploit Mem.free_parallel_extends; eauto. rewrite H9. intros [m'' [P Q]].
+  destruct or as [r|]; MonadInv.
   (* with an argument *)
 + exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
   eapply star_right. eexact A1.
   econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto. rewrite <- H11.
+  simpl. econstructor; eauto. rewrite <- H10.
   replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
      with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
   eapply add_equations_res_lessdef; eauto.
   rewrite !list_map_compose. apply list_map_exten; intros.
   unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
   apply return_regs_agree_callee_save.
-  unfold proj_sig_res. rewrite <- H11; rewrite H13. 
-  eapply Val.has_subtype; eauto. 
+
+  (* without an argument *)
++ assert (SG: f.(RTL.fn_sig).(sig_res) = None).
+  { exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
+    inv WTI; auto. }
+  exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eauto. eauto. traceEq.
+  simpl. econstructor; eauto.
+  unfold encode_long, loc_result. rewrite <- H10. rewrite SG. simpl; auto.
+  apply return_regs_agree_callee_save.
 
 (* internal function *)
 - monadInv FUN. simpl in *.
   destruct (transf_function_inv _ _ EQ). 
-  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. 
+  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H7; apply Zle_refl. 
   intros [m'' [U V]].
-  assert (WTRS: wt_regset env (init_regs args (fn_params f))).
-  { apply wt_init_regs. inv H0. eapply Val.has_subtype_list; eauto. congruence. }
   exploit (exec_moves mv). eauto. eauto.
     eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. 
-    rewrite call_regs_param_values. rewrite H9. eexact ARGS.
-    exact WTRS.
+    rewrite call_regs_param_values. rewrite H8. eexact ARGS.
   intros [ls1 [A B]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -2144,15 +2108,13 @@ Proof.
   exploit list_in_map_inv; eauto. intros [r [A B]]; subst l'. 
   destruct l; simpl; auto. red; intros; subst r0; elim H0.
   eapply loc_result_caller_save; eauto.
-  simpl. eapply external_call_well_typed; eauto. 
  
 (* return *)
 - inv STACKS.
-  exploit STEPS; eauto. eapply Val.has_subtype; eauto. intros [ls2 [A B]].
+  exploit STEPS; eauto. intros [ls2 [A B]].
   econstructor; split.
   eapply plus_left. constructor. eexact A. traceEq.
   econstructor; eauto.
-  apply wt_regset_assign; auto. eapply Val.has_subtype; eauto. 
 Qed.
 
 Lemma initial_states_simulation:
@@ -2173,7 +2135,6 @@ Proof.
   rewrite SIG; rewrite H3; simpl; auto.
   red; auto.
   apply Mem.extends_refl.
-  rewrite SIG; rewrite H3; simpl; auto.
 Qed.
 
 Lemma final_states_simulation:
@@ -2185,14 +2146,31 @@ Proof.
   unfold loc_result in RES; rewrite H in RES. simpl in RES. inv RES. inv H3; auto. 
 Qed.
 
+Lemma wt_prog: wt_program prog.
+Proof.
+  red; intros. exploit transform_partial_program_succeeds; eauto. 
+  intros [tfd TF]. destruct f; simpl in *. 
+- monadInv TF. unfold transf_function in EQ.
+  destruct (type_function f) as [env|] eqn:TF; try discriminate.
+  econstructor. eapply type_function_correct; eauto.
+- constructor.
+Qed.
+
 Theorem transf_program_correct:
   forward_simulation (RTL.semantics prog) (LTL.semantics tprog).
 Proof.
-  eapply forward_simulation_plus.
-  eexact symbols_preserved.
-  eexact initial_states_simulation.
-  eexact final_states_simulation.
-  exact step_simulation. 
+  set (ms := fun s s' => wt_state s /\ match_states s s').
+  eapply forward_simulation_plus with (match_states := ms). 
+- exact symbols_preserved.
+- intros. exploit initial_states_simulation; eauto. intros [st2 [A B]]. 
+  exists st2; split; auto. split; auto.
+  apply wt_initial_state with (p := prog); auto. exact wt_prog. 
+- intros. destruct H. eapply final_states_simulation; eauto. 
+- intros. destruct H0. 
+  exploit step_simulation; eauto. intros [s2' [A B]].
+  exists s2'; split. exact A. split.
+  eapply subject_reduction; eauto. eexact wt_prog. eexact H. 
+  auto.
 Qed.
 
 End PRESERVATION.
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 73c5453..b08fe87 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -10,20 +10,26 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Typing rules for Linear. *)
+(** Type-checking Linear code. *)
 
+Require Import FSets.
+Require FSetAVL.
 Require Import Coqlib.
+Require Import Ordered.
+Require Import Maps.
+Require Import Iteration.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
 Require Import Events.
 Require Import Op.
+Require Import Machregs.
 Require Import Locations.
 Require Import Conventions.
 Require Import LTL.
 Require Import Linear.
 
-(** The typing rules for Linear enforce several properties useful for
+(** The typechecker for Linear enforce several properties useful for
   the proof of the [Stacking] pass:
 - for each instruction, the type of the result register or slot
   agrees with the type of values produced by the instruction;
@@ -31,13 +37,154 @@ Require Import Linear.
   accessed through [Lgetstack] and [Lsetstack] Linear instructions.
 *)
 
-(** The rules are presented as boolean-valued functions so that we
+(** * Tracking the flow of single-precision floats *)
+
+Module Locset := FSetAVL.Make(OrderedLoc).
+
+(** At each program point, we infer a set of locations that are
+  guaranteed to contain single-precision floats.  [None] means
+  that the program point is not reachable. *)
+
+Definition singlefloats := option Locset.t.
+
+Definition FSbot : singlefloats := None.
+Definition FStop : singlefloats := Some Locset.empty.
+
+Definition setreg (fs: singlefloats) (r: mreg) (t: typ) :=
+  match fs with
+  | None => None
+  | Some s =>
+      Some(if typ_eq t Tsingle then Locset.add (R r) s else Locset.remove (R r) s)
+  end.
+
+Fixpoint setregs (fs: singlefloats) (rl: list mreg) (tl: list typ) :=
+  match rl, tl with
+  | nil, nil => fs
+  | r1 :: rs, t1 :: ts => setregs (setreg fs r1 t1) rs ts
+  | _, _ => fs
+  end.
+
+Definition copyloc (fs: singlefloats) (dst src: loc) :=
+  match fs with
+  | None => None
+  | Some s =>
+      Some(if Locset.mem src s then Locset.add dst s else Locset.remove dst s)
+  end.
+
+Definition destroyed_at_call_regs :=
+  List.fold_right (fun r fs => Locset.add (R r) fs) Locset.empty destroyed_at_call.
+
+Definition callregs (fs: singlefloats) :=
+  match fs with
+  | None => None
+  | Some s => Some (Locset.diff s destroyed_at_call_regs)
+  end.
+
+(** The forward dataflow analysis below records [singlefloats] sets
+  at every label.  Sets at other program points are recomputed when
+  needed. *)
+
+Definition labelmap := PTree.t Locset.t.
+
+(** [update_label lbl fs lm] updates the label map [lm] to reflect the
+  fact that the [singlefloats] set [fs] can flow into label [lbl].
+  It returns the set after the label, an updated label map, and a
+  boolean indicating whether the label map changed. *)
+
+Definition update_label (lbl: label) (fs: singlefloats) (lm: labelmap) : 
+                                              singlefloats * labelmap * bool :=
+  match fs, PTree.get lbl lm with
+  | None, None => (None, lm, false)
+  | None, Some s => (Some s, lm, false)
+  | Some s, None => (Some s, PTree.set lbl s lm, true)
+  | Some s1, Some s2 =>
+      if Locset.subset s2 s1
+      then (Some s2, lm, false)
+      else let s := Locset.inter s1 s2 in (Some s, PTree.set lbl s lm, true)
+  end.
+
+(** [update_labels] is similar, for a list of labels (coming from a 
+    [Ljumptable] instruction). *)
+
+Fixpoint update_labels (lbls: list label) (fs: singlefloats) (lm: labelmap): labelmap * bool :=
+  match lbls with
+  | nil => (lm, false)
+  | lbl1 :: lbls =>
+      let '(_, lm1, changed1) := update_label lbl1 fs lm in
+      let '(lm2, changed2) := update_labels lbls fs lm1 in
+      (lm2, changed1 || changed2)
+  end.
+
+(** One pass of forward analysis over the code [c].  Returns an updated
+  label map and a boolean indicating whether the label map changed. *)
+
+Fixpoint ana_code (lm: labelmap) (ch: bool) (fs: singlefloats) (c: code) : labelmap * bool :=
+  match c with
+  | nil => (lm, ch)
+  | Lgetstack sl ofs ty rd :: c =>
+      ana_code lm ch (copyloc fs (R rd) (S sl ofs ty)) c
+  | Lsetstack rs sl ofs ty :: c =>
+      ana_code lm ch (copyloc fs (S sl ofs ty) (R rs)) c
+  | Lop op args dst :: c =>
+      match is_move_operation op args with
+      | Some src => ana_code lm ch (copyloc fs (R dst) (R src)) c
+      | None     => ana_code lm ch (setreg fs dst (snd (type_of_operation op))) c
+      end
+  | Lload chunk addr args dst :: c =>
+      ana_code lm ch (setreg fs dst (type_of_chunk chunk)) c
+  | Lstore chunk addr args src :: c =>
+      ana_code lm ch fs c
+  | Lcall sg ros :: c =>
+      ana_code lm ch (callregs fs) c
+  | Ltailcall sg ros :: c =>
+      ana_code lm ch None c
+  | Lbuiltin ef args res :: c =>
+      ana_code lm ch (setregs fs res (proj_sig_res' (ef_sig ef))) c
+  | Lannot ef args :: c =>
+      ana_code lm ch fs c
+  | Llabel lbl :: c =>
+      let '(fs1, lm1, ch1) := update_label lbl fs lm in
+      ana_code lm1 (ch || ch1) fs1 c
+  | Lgoto lbl :: c =>
+      let '(_, lm1, ch1) := update_label lbl fs lm in
+      ana_code lm1 (ch || ch1) None c
+  | Lcond cond args lbl :: c =>
+      let '(_, lm1, ch1) := update_label lbl fs lm in
+      ana_code lm1 (ch || ch1) fs c
+  | Ljumptable r lbls :: c =>
+      let '(lm1, ch1) := update_labels lbls fs lm in
+      ana_code lm1 (ch || ch1) None c
+  | Lreturn :: c =>
+      ana_code lm ch None c
+  end.
+
+(** Iterating [ana_code] until the label map is stable. *)
+
+Definition ana_iter (c: code) (lm: labelmap) : labelmap + labelmap :=
+  let '(lm1, ch) := ana_code lm false FStop c in
+  if ch then inr _ lm1 else inl _ lm.
+
+Definition ana_function (f: function): option labelmap :=
+  PrimIter.iterate _ _ (ana_iter f.(fn_code)) (PTree.empty _).
+
+(** * The type-checker *)
+
+(** The typing rules are presented as boolean-valued functions so that we
   get an executable type-checker for free. *)
 
 Section WT_INSTR.
 
 Variable funct: function.
 
+Variable lm: labelmap.
+
+Definition FSmem (l: loc) (fs: singlefloats) : bool :=
+  match fs with None => true | Some s => Locset.mem l s end.
+
+Definition loc_type (fs: singlefloats) (l: loc) :=
+  let ty := Loc.type l in
+  if typ_eq ty Tfloat && FSmem l fs then Tsingle else ty.
+
 Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool :=
   match sl with
   | Local => zle 0 ofs
@@ -63,116 +210,672 @@ Definition loc_valid (l: loc) : bool :=
   | S _ _ _ => false
   end.
 
-Definition wt_instr (i: instruction) : bool :=
-  match i with
-  | Lgetstack sl ofs ty r =>
-      subtype ty (mreg_type r) && slot_valid sl ofs ty
-  | Lsetstack r sl ofs ty =>
-      slot_valid sl ofs ty && slot_writable sl
-  | Lop op args res =>
+Fixpoint wt_code (fs: singlefloats) (c: code) : bool :=
+  match c with
+  | nil => true
+  | Lgetstack sl ofs ty rd :: c =>
+      subtype ty (mreg_type rd) &&
+      slot_valid sl ofs ty &&
+      wt_code (copyloc fs (R rd) (S sl ofs ty)) c
+  | Lsetstack rs sl ofs ty :: c =>
+      subtype (loc_type fs (R rs)) ty &&
+      slot_valid sl ofs ty && slot_writable sl &&
+      wt_code (copyloc fs (S sl ofs ty) (R rs)) c
+  | Lop op args dst :: c =>
       match is_move_operation op args with
-      | Some arg =>
-          subtype (mreg_type arg) (mreg_type res)
-      | None => 
-          let (targs, tres) := type_of_operation op in
-          subtype tres (mreg_type res)
+      | Some src =>
+          typ_eq (mreg_type src) (mreg_type dst) &&
+          wt_code (copyloc fs (R dst) (R src)) c
+      | None =>
+          let (ty_args, ty_res) := type_of_operation op in
+          subtype ty_res (mreg_type dst) &&
+          wt_code (setreg fs dst ty_res) c
       end
-  | Lload chunk addr args dst =>
-      subtype (type_of_chunk chunk) (mreg_type dst)
-  | Ltailcall sg ros =>
-      zeq (size_arguments sg) 0
-  | Lbuiltin ef args res =>
-      subtype_list (proj_sig_res' (ef_sig ef)) (map mreg_type res) 
-  | Lannot ef args =>
-      forallb loc_valid args
-  | _ =>
-      true
+  | Lload chunk addr args dst :: c =>
+      subtype (type_of_chunk chunk) (mreg_type dst) &&
+      wt_code (setreg fs dst (type_of_chunk chunk)) c
+  | Lstore chunk addr args src :: c =>
+      wt_code fs c
+  | Lcall sg ros :: c =>
+      wt_code (callregs fs) c
+  | Ltailcall sg ros :: c =>
+      zeq (size_arguments sg) 0 &&
+      wt_code None c
+  | Lbuiltin ef args res :: c =>
+      let ty_res := proj_sig_res' (ef_sig ef) in
+      subtype_list ty_res (map mreg_type res) &&
+      wt_code (setregs fs res ty_res) c
+  | Lannot ef args :: c =>
+      forallb loc_valid args &&
+      wt_code fs c
+  | Llabel lbl :: c =>
+      wt_code lm!lbl c
+  | Lgoto lbl :: c =>
+      wt_code None c
+  | Lcond cond args lbl :: c =>
+      wt_code fs c
+  | Ljumptable r lbls :: c =>
+      wt_code None c
+  | Lreturn :: c =>
+      wt_code None c
   end.
 
 End WT_INSTR.
 
-Definition wt_code (f: function) (c: code) : bool :=
-  forallb (wt_instr f) c.
+Definition wt_funcode (f: function) (lm: labelmap) : bool :=
+  wt_code f lm FStop f.(fn_code).
 
 Definition wt_function (f: function) : bool :=
-  wt_code f f.(fn_code).
+  match ana_function f with
+  | None => false
+  | Some lm => wt_funcode f lm
+  end.
+
+(** * Properties of the static analysis *)
+
+Inductive FSincl: singlefloats -> singlefloats -> Prop :=
+  | FSincl_none: forall fs,
+      FSincl None fs
+  | FSincl_subset: forall s1 s2,
+      Locset.Subset s2 s1 -> FSincl (Some s1) (Some s2).
+
+Lemma update_label_false:
+  forall lbl fs lm fs' lm',
+  update_label lbl fs lm = (fs', lm', false) ->
+  FSincl fs lm!lbl /\ fs' = lm!lbl /\ lm' = lm.
+Proof.
+  unfold update_label; intros. 
+  destruct fs as [s1|]; destruct lm!lbl as [s2|].
+- destruct (Locset.subset s2 s1) eqn:S; inv H.
+  intuition. constructor. apply Locset.subset_2; auto.
+- inv H.
+- inv H. intuition. constructor. 
+- inv H. intuition. constructor.
+Qed.
 
-(** Typing the run-time state.  These definitions are used in [Stackingproof]. *)
+Lemma update_labels_false:
+  forall fs lbls lm lm',
+  update_labels lbls fs lm = (lm', false) ->
+  (forall lbl, In lbl lbls -> FSincl fs lm!lbl) /\ lm' = lm.
+Proof.
+  induction lbls; simpl; intros. 
+- inv H. tauto. 
+- destruct (update_label a fs lm) as [[fs1 lm1] changed1] eqn:UL.
+  destruct (update_labels lbls fs lm1) as [lm2 changed2] eqn:ULS.
+  inv H. apply orb_false_iff in H2. destruct H2; subst.
+  exploit update_label_false; eauto. intros (A & B & C).
+  exploit IHlbls; eauto. intros (D & E). subst.
+  split. intros. destruct H. congruence. auto.
+  auto.
+Qed.
+
+Lemma ana_code_false:
+  forall lm' c lm ch fs, ana_code lm ch fs c = (lm', false) -> ch = false /\ lm' = lm.
+Proof.
+  induction c; simpl; intros.
+- inv H; auto. 
+- destruct a; try (eapply IHc; eauto; fail).
+  + destruct (is_move_operation o l); eapply IHc; eauto.
+  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
+    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
+    exploit update_label_false; eauto. intros (C & D & E).
+    auto.
+  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
+    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
+    exploit update_label_false; eauto. intros (C & D & E). 
+    auto.
+  + destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL.
+    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
+    exploit update_label_false; eauto. intros (C & D & E). 
+    auto.
+  + destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL.
+    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
+    exploit update_labels_false; eauto. intros (C & D); subst.
+    auto.
+Qed.
+
+Lemma ana_function_inv:
+  forall f lm,
+  ana_function f = Some lm -> ana_code lm false FStop f.(fn_code) = (lm, false).
+Proof.
+  intros. unfold ana_function in H.
+  eapply PrimIter.iterate_prop with
+    (Q := fun lm => ana_code lm false FStop (fn_code f) = (lm, false))
+    (P := fun (lm: labelmap) => True); eauto.
+  intros. unfold ana_iter.
+  destruct (ana_code a false FStop (fn_code f)) as (lm1, ch1) eqn:ANA.
+  destruct ch1. auto. exploit ana_code_false; eauto. intros [A B]. congruence. 
+Qed.
+
+Remark wt_ana_code_cons:
+  forall f lm fs i c,
+  ana_code lm false fs (i :: c) = (lm, false) ->
+  wt_code f lm fs (i :: c) = true ->
+  exists fs', ana_code lm false fs' c = (lm, false) /\ wt_code f lm fs' c = true.
+Proof.
+  simpl; intros; destruct i; InvBooleans; try (econstructor; split; eassumption).
+- destruct (is_move_operation o l).
+  InvBooleans; econstructor; eauto.
+  destruct (type_of_operation o); InvBooleans; econstructor; eauto.
+- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_label_false; eauto. intros (C & D & E); subst.
+  econstructor; eauto.
+- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  econstructor; eauto.
+- destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL. 
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  econstructor; eauto.
+- destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL. 
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  econstructor; eauto.
+Qed.
+
+Lemma wt_find_label_rec:
+  forall f lm lbl c' c fs,
+  find_label lbl c = Some c' ->
+  ana_code lm false fs c = (lm, false) ->
+  wt_code f lm fs c = true ->
+  ana_code lm false (PTree.get lbl lm) c' = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c' = true.
+Proof.
+  induction c; intros.
+- discriminate.
+- simpl in H. specialize (is_label_correct lbl a). destruct (is_label lbl a); intros IL.
+  + subst a. inv H. simpl in *. 
+    destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
+    exploit ana_code_false; eauto. intros [A B]; subst.
+    exploit update_label_false; eauto. intros (C & D & E). subst.
+    InvBooleans. auto.
+  + exploit wt_ana_code_cons; eauto. intros (fs' & A & B).
+    eapply IHc; eauto. 
+Qed.
+
+Lemma wt_find_label:
+  forall f lm lbl c,
+  ana_function f = Some lm ->
+  wt_funcode f lm = true ->
+  find_label lbl f.(fn_code) = Some c ->
+  ana_code lm false (PTree.get lbl lm) c = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c = true.
+Proof.
+  intros. eapply wt_find_label_rec; eauto. apply ana_function_inv; auto. 
+Qed.
+
+(** * Soundness of the type system *)
 
 Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
+
+Module LSF := FSetFacts.Facts(Locset).
+
+(** ** Typing the run-time state *)
+
+Inductive wt_locset: singlefloats -> locset -> Prop :=
+  | wt_locset_intro: forall s ls
+      (TY: forall l, Val.has_type (ls l) (Loc.type l))
+      (SINGLE: forall l, Locset.In l s -> Val.has_type (ls l) Tsingle),
+    wt_locset (Some s) ls.
+
+Lemma wt_mreg:
+  forall fs ls r, wt_locset fs ls -> Val.has_type (ls (R r)) (mreg_type r).
+Proof.
+  intros. inv H. apply (TY (R r)). 
+Qed.
+
+Lemma wt_loc_type:
+  forall fs ls l, wt_locset fs ls -> Val.has_type (ls l) (loc_type fs l).
+Proof.
+  intros. inv H. unfold loc_type, FSmem.
+  destruct (typ_eq (Loc.type l) Tfloat); simpl; auto.
+  destruct (Locset.mem l s) eqn:MEM; auto. 
+  apply SINGLE. apply Locset.mem_2; auto.
+Qed.
+
+Lemma loc_type_subtype:
+  forall fs l, subtype (loc_type fs l) (Loc.type l) = true.
+Proof.
+  unfold loc_type; intros. destruct (typ_eq (Loc.type l) Tfloat); simpl.
+  rewrite e. destruct (FSmem l fs); auto. 
+  destruct (Loc.type l); auto.
+Qed.
+
+Lemma wt_locset_top:
+  forall ls,
+  (forall l, Val.has_type (ls l) (Loc.type l)) -> 
+  wt_locset FStop ls.
+Proof.
+  intros; constructor; intros. 
+  auto.
+  eelim Locset.empty_1; eauto.
+Qed.
 
-Definition wt_locset (ls: locset) : Prop :=
-  forall l, Val.has_type (ls l) (Loc.type l).
+Lemma wt_locset_mon:
+  forall fs1 fs2 ls,
+  FSincl fs1 fs2 -> wt_locset fs1 ls -> wt_locset fs2 ls.
+Proof.
+  intros. inv H0; inv H. constructor; intros; auto. 
+Qed.
 
 Lemma wt_setreg:
-  forall ls r v,
-  Val.has_type v (mreg_type r) -> wt_locset ls -> wt_locset (Locmap.set (R r) v ls).
+  forall fs ls r v ty,
+  Val.has_type v ty -> subtype ty (mreg_type r) = true -> wt_locset fs ls ->
+  wt_locset (setreg fs r ty) (Locmap.set (R r) v ls).
+Proof.
+  intros. inv H1. constructor; intros. 
+- unfold Locmap.set. destruct (Loc.eq (R r) l).
+  subst l; simpl. eapply Val.has_subtype; eauto.
+  destruct (Loc.diff_dec (R r) l); simpl; auto.
+- unfold Locmap.set. destruct (Loc.eq (R r) l).
+  destruct (typ_eq ty Tsingle). congruence. 
+  subst l. rewrite LSF.remove_iff in H1. intuition.
+  destruct (Loc.diff_dec (R r) l); simpl; auto.
+  apply SINGLE. 
+  destruct (typ_eq ty Tsingle).
+  rewrite LSF.add_iff in H1; intuition.
+  rewrite LSF.remove_iff in H1; intuition.
+Qed.
+
+Lemma wt_setregs:
+  forall vl tyl rl fs rs,
+  Val.has_type_list vl tyl ->
+  subtype_list tyl (map mreg_type rl) = true ->
+  wt_locset fs rs ->
+  wt_locset (setregs fs rl tyl) (Locmap.setlist (map R rl) vl rs).
+Proof.
+  induction vl; simpl; intros.
+- destruct tyl; try contradiction. destruct rl; try discriminate. 
+  simpl. auto.
+- destruct tyl as [ | ty tyl]; try contradiction. destruct H.
+  destruct rl as [ | r rl]; simpl in H0; try discriminate. InvBooleans.
+  simpl. eapply IHvl; eauto. eapply wt_setreg; eauto. 
+Qed.
+
+Lemma undef_regs_type:
+  forall ty l rl ls,
+  Val.has_type (ls l) ty -> Val.has_type (undef_regs rl ls l) ty.
+Proof.
+  induction rl; simpl; intros.
+- auto.
+- unfold Locmap.set. destruct (Loc.eq (R a) l). red; auto. 
+  destruct (Loc.diff_dec (R a) l); auto. red; auto.
+Qed.
+
+Lemma wt_copyloc_gen:
+  forall fs ls src dst temps,
+  Val.has_type (ls src) (Loc.type dst) ->
+  wt_locset fs ls ->
+  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
 Proof.
-  intros; red; intros.
-  unfold Locmap.set. 
-  destruct (Loc.eq (R r) l).
-  subst l; auto.
-  destruct (Loc.diff_dec (R r) l). auto. red. auto.
+  intros. inversion H0; subst. constructor; intros. 
+- unfold Locmap.set. destruct (Loc.eq dst l).
+  subst l. auto.
+  destruct (Loc.diff_dec dst l); simpl; auto.
+  apply undef_regs_type; auto.
+- unfold Locmap.set. destruct (Loc.eq dst l).
+  subst l. destruct (Locset.mem src s) eqn:E.
+  apply SINGLE. apply Locset.mem_2; auto. 
+  rewrite LSF.remove_iff in H1. intuition.
+  destruct (Loc.diff_dec dst l); simpl; auto.
+  apply undef_regs_type; auto.
+  apply SINGLE.
+  destruct (Locset.mem src s).
+  rewrite LSF.add_iff in H1. intuition.
+  rewrite LSF.remove_iff in H1. intuition.
 Qed.
 
-Lemma wt_setstack:
-  forall ls sl ofs ty v,
-  wt_locset ls -> wt_locset (Locmap.set (S sl ofs ty) v ls).
+Lemma wt_copyloc:
+  forall fs ls src dst temps,
+  subtype (Loc.type src) (Loc.type dst) = true ->
+  wt_locset fs ls ->
+  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
 Proof.
-  intros; red; intros.
-  unfold Locmap.set. 
-  destruct (Loc.eq (S sl ofs ty) l).
-  subst l. simpl. 
-  generalize (Val.load_result_type (chunk_of_type ty) v). 
-  replace (type_of_chunk (chunk_of_type ty)) with ty. auto.
-  destruct ty; reflexivity.
-  destruct (Loc.diff_dec (S sl ofs ty) l). auto. red. auto.
+  intros. eapply wt_copyloc_gen; eauto.
+  eapply Val.has_subtype; eauto. inv H0; auto. 
 Qed.
 
 Lemma wt_undef_regs:
-  forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls).
+  forall fs ls temps, wt_locset fs ls -> wt_locset fs (undef_regs temps ls).
 Proof.
-  induction rs; simpl; intros. auto. apply wt_setreg; auto. red; auto.
+  intros. inv H; constructor; intros.
+- apply undef_regs_type; auto. 
+- apply undef_regs_type; auto. 
 Qed.
 
 Lemma wt_call_regs:
-  forall ls, wt_locset ls -> wt_locset (call_regs ls).
+  forall fs ls, wt_locset fs ls -> wt_locset FStop (call_regs ls).
+Proof.
+  intros. inv H. apply wt_locset_top; intros. 
+  unfold call_regs. 
+  destruct l; auto. 
+  destruct sl; try exact I. 
+  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)); auto.
+Qed.
+
+Remark destroyed_at_call_regs_charact:
+  forall l,
+  Locset.In l destroyed_at_call_regs <->
+  match l with R r => In r destroyed_at_call | S _ _ _ => False end.
 Proof.
-  intros; red; intros. unfold call_regs. destruct l. auto.
-  destruct sl.
-  red; auto.
-  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)). auto.
-  red; auto.
+  intros. unfold destroyed_at_call_regs. generalize destroyed_at_call. 
+  induction l0; simpl.
+- rewrite LSF.empty_iff. destruct l; tauto.
+- rewrite LSF.add_iff. rewrite IHl0. destruct l; intuition congruence.
 Qed.
 
 Lemma wt_return_regs:
-  forall caller callee,
-  wt_locset caller -> wt_locset callee -> wt_locset (return_regs caller callee).
+  forall fs caller fs' callee,
+  wt_locset fs caller -> wt_locset fs' callee ->
+  wt_locset (callregs fs) (return_regs caller callee).
 Proof.
-  intros; red; intros.
-  unfold return_regs. destruct l; auto.
+  intros. inv H; inv H0; constructor; intros.
+- unfold return_regs. destruct l; auto. 
   destruct (in_dec mreg_eq r destroyed_at_call); auto.
+- unfold callregs. 
+  rewrite LSF.diff_iff in H. rewrite destroyed_at_call_regs_charact in H. destruct H.
+  unfold return_regs. destruct l.
++ destruct (in_dec mreg_eq r destroyed_at_call). tauto. auto. 
++ auto.
 Qed.
 
 Lemma wt_init:
-  wt_locset (Locmap.init Vundef).
+  forall s, wt_locset (Some s) (Locmap.init Vundef).
 Proof.
-  red; intros. unfold Locmap.init. red; auto.
+  intros; constructor; intros; simpl; auto.
 Qed.
 
-Lemma wt_setlist_result:
-  forall sg v rs,
-  Val.has_type v (proj_sig_res sg) ->
-  wt_locset rs ->
-  wt_locset (Locmap.setlist (map R (loc_result sg)) (encode_long (sig_res sg) v) rs).
+Lemma callregs_setregs_result:
+  forall sg fs,
+  FSincl (setregs fs (loc_result sg) (proj_sig_res' sg)) (callregs fs).
 Proof.
-  unfold loc_result, encode_long, proj_sig_res; intros.
-  destruct (sig_res sg) as [[] | ]; simpl.
-- apply wt_setreg; auto.
-- apply wt_setreg; auto.
-- destruct v; simpl in H; try contradiction;
-  simpl; apply wt_setreg; auto; apply wt_setreg; auto.
-- apply wt_setreg; auto. apply Val.has_subtype with Tsingle; auto. 
-- apply wt_setreg; auto.
+  assert (X: forall rl tyl, setregs None rl tyl = None).
+  {
+    induction rl; destruct tyl; simpl; auto. 
+  }
+  assert (Y: forall rl s tyl,
+             exists s', setregs (Some s) rl tyl = Some s'
+                     /\ forall l, Locset.In l s -> ~In l (map R rl) -> Locset.In l s').
+  {
+    induction rl; simpl; intros.
+  - exists s; split. destruct tyl; auto. tauto.
+  - destruct tyl. exists s; tauto. 
+    destruct (IHrl (if typ_eq t Tsingle
+                    then Locset.add (R a) s
+                    else Locset.remove (R a) s) tyl) as (s1 & A & B).
+    exists s1; split; auto. intros. apply B.
+    destruct (typ_eq t Tsingle). 
+    rewrite LSF.add_iff. tauto. 
+    rewrite LSF.remove_iff. tauto.
+    tauto.
+  }
+  intros. destruct fs as [s|]; simpl.
+  - destruct (Y (loc_result sg) s (proj_sig_res' sg)) as (s' & A & B).
+    rewrite A. constructor. red; intros.
+    rewrite LSF.diff_iff in H. destruct H. apply B. auto. 
+    red; intros. exploit list_in_map_inv; eauto. intros (r & U & V).
+    subst a. elim H0. rewrite destroyed_at_call_regs_charact. 
+    eapply loc_result_caller_save; eauto.
+  - rewrite X. constructor.
 Qed.
 
+Definition wt_fundef (fd: fundef) :=
+  match fd with
+  | Internal f => wt_function f = true
+  | External ef => True
+  end.
+
+Inductive wt_callstack: list stackframe -> singlefloats -> Prop :=
+  | wt_callstack_nil: forall s,
+      wt_callstack nil (Some s)
+  | wt_callstack_cons: forall f sp rs c s fs lm fs0 fs1
+        (WTSTK: wt_callstack s fs0)
+        (ANF: ana_function f = Some lm)
+        (WTF: wt_funcode f lm = true)
+        (ANC: ana_code lm false (callregs fs1) c = (lm, false))
+        (WTC: wt_code f lm (callregs fs1) c = true)
+        (WTRS: wt_locset fs rs)
+        (INCL: FSincl (callregs fs) (callregs fs1)),
+      wt_callstack (Stackframe f sp rs c :: s) fs.
+
+Lemma wt_parent_locset:
+  forall s fs, wt_callstack s fs -> wt_locset fs (parent_locset s).
+Proof.
+  induction 1; simpl.
+- apply wt_init.
+- auto.
+Qed.
+
+Lemma wt_callstack_change_fs:
+  forall s fs, wt_callstack s fs -> wt_callstack s (callregs fs).
+Proof.
+  induction 1.
+- constructor. 
+- econstructor; eauto. 
+  apply wt_locset_mon with fs; auto.
+  + destruct fs; simpl; constructor.
+    red; intros. eapply Locset.diff_1; eauto. 
+  + inv INCL; simpl; constructor.
+    destruct fs; simpl in H0; inv H0. 
+    red; intros. exploit H2; eauto. rewrite ! LSF.diff_iff. tauto.
+Qed.
+
+Inductive wt_state: state -> Prop :=
+  | wt_regular_state: forall s f sp c rs m lm fs fs0
+        (WTSTK: wt_callstack s fs0)
+        (ANF: ana_function f = Some lm)
+        (WTF: wt_funcode f lm = true)
+        (ANC: ana_code lm false fs c = (lm, false))
+        (WTC: wt_code f lm fs c = true)
+        (WTRS: wt_locset fs rs),
+      wt_state (State s f sp c rs m)
+  | wt_call_state: forall s fd rs m fs
+        (WTSTK: wt_callstack s fs)
+        (WTFD: wt_fundef fd)
+        (WTRS: wt_locset fs rs),
+      wt_state (Callstate s fd rs m)
+  | wt_return_state: forall s rs m fs
+        (WTSTK: wt_callstack s fs)
+        (WTRS: wt_locset (callregs fs) rs),
+      wt_state (Returnstate s rs m).
+
+(** ** Preservation of state typing by transitions *)
+
+Section SOUNDNESS.
+
+Variable prog: program.
+Let ge := Genv.globalenv prog.
+
+Hypothesis wt_prog:
+  forall i fd, In (i, Gfun fd) prog.(prog_defs) -> wt_fundef fd.
+
+Lemma wt_find_function:
+  forall ros rs f, find_function ge ros rs = Some f -> wt_fundef f.
+Proof.
+  intros. 
+  assert (X: exists i, In (i, Gfun f) prog.(prog_defs)).
+  {
+    destruct ros as [r | s]; simpl in H.
+    eapply Genv.find_funct_inversion; eauto. 
+    destruct (Genv.find_symbol ge s) as [b|]; try discriminate.
+    eapply Genv.find_funct_ptr_inversion; eauto.
+  }
+  destruct X as [i IN]. eapply wt_prog; eauto. 
+Qed.
+
+Theorem step_type_preservation:
+  forall S1 t S2, step ge S1 t S2 -> wt_state S1 -> wt_state S2.
+Proof.
+  induction 1; intros WTS; inv WTS.
+- (* getstack *)
+  simpl in *; InvBooleans. 
+  econstructor; eauto.
+  apply wt_copyloc; auto.
+- (* setstack *)
+  simpl in *; InvBooleans. 
+  econstructor; eauto.
+  apply wt_copyloc_gen; auto.
+  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto.
+- (* op *)
+  simpl in *. destruct (is_move_operation op args) as [src | ] eqn:ISMOVE.
+  + (* move *)
+    InvBooleans. exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst. 
+    simpl in H. inv H.
+    econstructor; eauto. 
+    apply wt_copyloc; auto. simpl. rewrite H0. 
+    destruct (mreg_type res); auto.
+  + (* other ops *) 
+    destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
+    econstructor; eauto.
+    apply wt_setreg; auto. 
+    change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. 
+    eapply type_of_operation_sound; eauto. 
+    red; intros; subst op. simpl in ISMOVE. 
+    destruct args; try discriminate. destruct args; discriminate. 
+    apply wt_undef_regs; auto.
+- (* load *)
+  simpl in *; InvBooleans. 
+  econstructor; eauto.
+  apply wt_setreg. 
+  destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
+  auto. 
+  apply wt_undef_regs; auto.
+- (* store *)
+  simpl in *; InvBooleans. 
+  econstructor; eauto.
+  apply wt_undef_regs; auto.
+- (* call *)
+  simpl in *; InvBooleans.
+  econstructor; eauto. econstructor; eauto.
+  destruct (callregs fs); constructor. red; auto. 
+  eapply wt_find_function; eauto.
+- (* tailcall *)
+  simpl in *; InvBooleans.
+  econstructor. apply wt_callstack_change_fs; eauto. 
+  eapply wt_find_function; eauto.
+  eapply wt_return_regs. apply wt_parent_locset; auto. eauto.
+- (* builtin *)
+  simpl in *; InvBooleans.
+  econstructor; eauto.
+  apply wt_setregs. 
+  eapply external_call_well_typed'; eauto.
+  auto.
+  apply wt_undef_regs; auto.
+- (* annot *)
+  simpl in *; InvBooleans.
+  econstructor; eauto. 
+- (* label *)
+  simpl in *.
+  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_label_false; eauto. intros (A & B & C); subst.
+  econstructor; eauto.
+  eapply wt_locset_mon; eauto.
+- (* goto *)
+  simpl in *.
+  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_label_false; eauto. intros (A & B & C); subst.
+  exploit wt_find_label; eauto. intros [P Q].
+  econstructor; eauto. 
+  eapply wt_locset_mon; eauto.
+- (* cond branch, taken *)
+  simpl in *.
+  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_label_false; eauto. intros (A & B & C); subst.
+  exploit wt_find_label; eauto. intros [P Q].
+  econstructor; eauto. 
+  eapply wt_locset_mon. eauto. 
+  apply wt_undef_regs; auto.
+- (* cond branch, not taken *)
+  simpl in *.
+  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_label_false; eauto. intros (A & B & C); subst.
+  econstructor. eauto. eauto. eauto. eauto. eauto.  
+  apply wt_undef_regs; auto.
+- (* jumptable *)
+  simpl in *.
+  destruct (update_labels tbl fs lm) as [lm1 ch1] eqn:UL.
+  exploit ana_code_false; eauto. intros [A B]; subst.
+  exploit update_labels_false; eauto. intros (A & B); subst.
+  exploit wt_find_label; eauto. intros [P Q].
+  econstructor; eauto.
+  apply wt_undef_regs. eapply wt_locset_mon; eauto. apply A. eapply list_nth_z_in; eauto. 
+- (* return *)
+  econstructor. eauto. 
+  eapply wt_return_regs. 
+  apply wt_parent_locset; auto.
+  eauto.
+- (* internal function *)
+  simpl in WTFD. unfold wt_function in WTFD.
+  destruct (ana_function f) as [lm|] eqn:ANF; try discriminate.
+  econstructor. eauto. eauto. eauto. apply ana_function_inv; auto. exact WTFD. 
+  apply wt_undef_regs. eapply wt_call_regs; eauto. 
+- (* external function *)
+  econstructor. eauto. 
+  eapply wt_locset_mon.
+  eapply callregs_setregs_result. 
+  eapply wt_setregs.
+  eapply external_call_well_typed'; eauto.
+  unfold proj_sig_res', loc_result. destruct (sig_res (ef_sig ef) )as [[] | ]; auto.
+  auto.
+- (* return *)
+  inv WTSTK. econstructor; eauto. 
+  apply wt_locset_mon with (callregs fs); auto. 
+Qed.
+
+Theorem wt_initial_state:
+  forall S, initial_state prog S -> wt_state S.
+Proof.
+  induction 1. econstructor. 
+  apply wt_callstack_nil with (s := Locset.empty). 
+  unfold ge0 in H1. exploit Genv.find_funct_ptr_inversion; eauto.
+  intros [id IN]. eapply wt_prog; eauto. 
+  apply wt_init.
+Qed.
+
+End SOUNDNESS.
+
+(** Properties of well-typed states that are used in [Stackingproof]. *)
+
+Lemma wt_state_getstack:
+  forall s f sp sl ofs ty rd c rs m,
+  wt_state (State s f sp (Lgetstack sl ofs ty rd :: c) rs m) ->
+  slot_valid f sl ofs ty = true.
+Proof.
+  intros. inv H. simpl in WTC; InvBooleans. auto.
+Qed.
+
+Lemma wt_state_setstack:
+  forall s f sp sl ofs ty r c rs m,
+  wt_state (State s f sp (Lsetstack r sl ofs ty :: c) rs m) ->
+  Val.has_type (rs (R r)) ty /\ slot_valid f sl ofs ty = true /\ slot_writable sl = true.
+Proof.
+  intros. inv H. simpl in WTC; InvBooleans. intuition. 
+  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto. 
+Qed.
+
+Lemma wt_state_tailcall:
+  forall s f sp sg ros c rs m,
+  wt_state (State s f sp (Ltailcall sg ros :: c) rs m) ->
+  size_arguments sg = 0.
+Proof.
+  intros. inv H. simpl in WTC; InvBooleans. auto.
+Qed.
+
+Lemma wt_state_annot:
+  forall s f sp ef args c rs m,
+  wt_state (State s f sp (Lannot ef args :: c) rs m) ->
+  forallb (loc_valid f) args = true.
+Proof.
+  intros. inv H. simpl in WTC; InvBooleans. auto. 
+Qed.
+
+Lemma wt_callstate_wt_regs:
+  forall s f rs m,
+  wt_state (Callstate s f rs m) ->
+  forall r, Val.has_type (rs (R r)) (mreg_type r).
+Proof.
+  intros. inv H. eapply wt_mreg; eauto.
+Qed.
diff --git a/backend/Locations.v b/backend/Locations.v
index 43ce210..96f1eba 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -301,29 +301,16 @@ Module Locmap.
 
   Definition set (l: loc) (v: val) (m: t) : t :=
     fun (p: loc) =>
-      if Loc.eq l p then
-        match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end
-      else if Loc.diff_dec l p then
-        m p
+      if Loc.eq l p then v
+      else if Loc.diff_dec l p then m p
       else Vundef.
 
   Lemma gss: forall l v m,
-    (set l v m) l = 
-    match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end.
+    (set l v m) l = v.
   Proof.
     intros. unfold set. apply dec_eq_true.
   Qed.
 
-  Lemma gss_reg: forall r v m, (set (R r) v m) (R r) = v.
-  Proof.
-    intros. unfold set. rewrite dec_eq_true. auto.
-  Qed.
-
-  Lemma gss_typed: forall l v m, Val.has_type v (Loc.type l) -> (set l v m) l = v.
-  Proof.
-    intros. rewrite gss. destruct l. auto. apply Val.load_result_same; auto. 
-  Qed.
-
   Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p.
   Proof.
     intros. unfold set. destruct (Loc.eq l p).
@@ -349,11 +336,10 @@ Module Locmap.
   Proof.
     assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
       induction ll; simpl; intros. auto. apply IHll. 
-      unfold set. destruct (Loc.eq a l).
-      destruct a. auto. destruct ty; reflexivity. 
+      unfold set. destruct (Loc.eq a l). auto.
       destruct (Loc.diff_dec a l); auto.
     induction ll; simpl; intros. contradiction. 
-    destruct H. apply P. subst a. apply gss_typed. exact I. 
+    destruct H. apply P. subst a. apply gss.  
     auto.
   Qed.
 
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 24f8d7b..e8ae7ae 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -14,7 +14,7 @@
 
 Require Import Coqlib.
 Require Import Errors.
-Require Import Subtyping.
+Require Import Unityping.
 Require Import Maps.
 Require Import AST.
 Require Import Op.
@@ -36,7 +36,8 @@ Require Import Conventions.
   is very simple, consisting of the four types [Tint] (for integers
   and pointers), [Tfloat] (for double-precision floats), [Tlong]
   (for 64-bit integers) and [Tsingle] (for single-precision floats).
-  The only subtlety is that [Tsingle] is a subtype of [Tfloat].
+  At the RTL level, we simplify things further by equating [Tsingle]
+  with [Tfloat].
 
   Additionally, we impose that each pseudo-register has the same type
   throughout the function.  This requirement helps with register allocation,
@@ -56,6 +57,11 @@ Require Import Conventions.
   which can work over both integers and floats.
 *)
 
+Definition normalize (ty: typ) : typ :=
+  match ty with Tsingle => Tfloat | _ => ty end.
+
+Definition normalize_list (tyl: list typ) : list typ := map normalize tyl.
+
 Definition regenv := reg -> typ.
 
 Section WT_INSTR.
@@ -73,57 +79,57 @@ Inductive wt_instr : instruction -> Prop :=
       wt_instr (Inop s)
   | wt_Iopmove:
       forall r1 r s,
-      subtype (env r1) (env r) = true ->
+      env r = env r1 ->
       valid_successor s ->
       wt_instr (Iop Omove (r1 :: nil) r s)
   | wt_Iop:
       forall op args res s,
       op <> Omove ->
-      subtype_list (map env args) (fst (type_of_operation op)) = true ->
-      subtype (snd (type_of_operation op)) (env res) = true ->
+      map env args = fst (type_of_operation op) ->
+      env res = normalize (snd (type_of_operation op)) ->
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
       forall chunk addr args dst s,
-      subtype_list (map env args) (type_of_addressing addr) = true ->
-      subtype (type_of_chunk chunk) (env dst) = true ->
+      map env args = type_of_addressing addr ->
+      env dst = normalize (type_of_chunk chunk) ->
       valid_successor s ->
       wt_instr (Iload chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
-      subtype_list (map env args) (type_of_addressing addr) = true ->
-      subtype (env src) (type_of_chunk_use chunk) = true ->
+      map env args = type_of_addressing addr ->
+      env src = type_of_chunk_use chunk ->
       valid_successor s ->
       wt_instr (Istore chunk addr args src s)
   | wt_Icall:
       forall sig ros args res s,
-      match ros with inl r => subtype (env r) Tint = true | inr s => True end ->
-      subtype_list (map env args) sig.(sig_args) = true ->
-      subtype (proj_sig_res sig) (env res) = true ->
+      match ros with inl r => env r = Tint | inr s => True end ->
+      map env args = normalize_list sig.(sig_args) ->
+      env res = normalize (proj_sig_res sig) ->
       valid_successor s ->
       wt_instr (Icall sig ros args res s)
   | wt_Itailcall:
       forall sig ros args,
-      match ros with inl r => subtype (env r) Tint = true | inr s => True end ->
+      match ros with inl r => env r = Tint | inr s => True end ->
+      map env args = normalize_list sig.(sig_args) ->
       sig.(sig_res) = funct.(fn_sig).(sig_res) ->
-      subtype_list (map env args) sig.(sig_args) = true ->
       tailcall_possible sig ->
       wt_instr (Itailcall sig ros args)
   | wt_Ibuiltin:
       forall ef args res s,
-      subtype_list (map env args) (ef_sig ef).(sig_args) = true ->
-      subtype (proj_sig_res (ef_sig ef)) (env res) = true ->
+      map env args = normalize_list (ef_sig ef).(sig_args) ->
+      env res = normalize (proj_sig_res (ef_sig ef)) ->
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
       forall cond args s1 s2,
-      subtype_list (map env args) (type_of_condition cond) = true ->
+      map env args = type_of_condition cond ->
       valid_successor s1 ->
       valid_successor s2 ->
       wt_instr (Icond cond args s1 s2)
   | wt_Ijumptable:
       forall arg tbl,
-      subtype (env arg) Tint = true ->
+      env arg = Tint ->
       (forall s, In s tbl -> valid_successor s) ->
       list_length_z tbl * 4 <= Int.max_unsigned ->
       wt_instr (Ijumptable arg tbl)
@@ -133,7 +139,7 @@ Inductive wt_instr : instruction -> Prop :=
   | wt_Ireturn_some:
       forall arg ty,
       funct.(fn_sig).(sig_res) = Some ty ->
-      subtype (env arg) ty = true ->
+      env arg = normalize ty ->
       wt_instr (Ireturn (Some arg)).
 
 End WT_INSTR.
@@ -146,7 +152,7 @@ End WT_INSTR.
 Record wt_function (f: function) (env: regenv): Prop :=
   mk_wt_function {
     wt_params:
-      subtype_list f.(fn_sig).(sig_args) (map env f.(fn_params)) = true;
+      map env f.(fn_params) = normalize_list f.(fn_sig).(sig_args);
     wt_norepet:
       list_norepet f.(fn_params);
     wt_instrs:
@@ -168,8 +174,8 @@ Definition wt_program (p: program): Prop :=
 
 (** * Type inference *)
 
-(** Type inference reuses the generic solver for subtyping constraints
-  defined in module [Subtyping]. *)
+(** Type inference reuses the generic solver for unification constraints
+  defined in module [Unityping]. *)
 
 Module RTLtypes <: TYPE_ALGEBRA.
 
@@ -177,122 +183,9 @@ Definition t := typ.
 Definition eq := typ_eq.
 Definition default := Tint.
 
-Definition sub (t1 t2: typ): Prop := subtype t1 t2 = true.
-
-Lemma sub_refl: forall ty, sub ty ty.
-Proof. unfold sub; destruct ty; auto. Qed.
-
-Lemma sub_trans: forall ty1 ty2 ty3, sub ty1 ty2 -> sub ty2 ty3 -> sub ty1 ty3.
-Proof.
-  unfold sub; intros. destruct ty1; destruct ty2; try discriminate; destruct ty3; auto; discriminate.
-Qed.
-
-Definition sub_dec: forall ty1 ty2, {sub ty1 ty2} + {~sub ty1 ty2}.
-Proof.
-  unfold sub; intros. destruct (subtype ty1 ty2). left; auto. right; congruence.
-Defined.
-
-Definition lub (ty1 ty2: typ) :=
-  match ty1, ty2 with
-  | Tint, Tint => Tint
-  | Tlong, Tlong => Tlong
-  | Tfloat, Tfloat => Tfloat
-  | Tsingle, Tsingle => Tsingle
-  | Tfloat, Tsingle => Tfloat
-  | Tsingle, Tfloat => Tfloat
-  | _, _ => default
-  end.
-
-Lemma lub_left: forall x y z, sub x z -> sub y z -> sub x (lub x y).
-Proof.
-  unfold sub, lub; intros. destruct x; destruct y; auto; destruct z; discriminate.
-Qed.
-
-Lemma lub_right: forall x y z, sub x z -> sub y z -> sub y (lub x y).
-Proof.
-  unfold sub, lub; intros. destruct x; destruct y; auto; destruct z; discriminate.
-Qed.
-
-Lemma lub_min: forall x y z, sub x z -> sub y z -> sub (lub x y) z.
-Proof.
-  unfold sub, lub; intros. destruct x; destruct z; try discriminate; destruct y; auto; discriminate.
-Qed.
-
-Definition glb (ty1 ty2: typ) :=
-  match ty1, ty2 with
-  | Tint, Tint => Tint
-  | Tlong, Tlong => Tlong
-  | Tfloat, Tfloat => Tfloat
-  | Tsingle, Tsingle => Tsingle
-  | Tfloat, Tsingle => Tsingle
-  | Tsingle, Tfloat => Tsingle
-  | _, _ => default
-  end.
-
-Lemma glb_left: forall x y z, sub z x -> sub z y -> sub (glb x y) x.
-Proof.
-  unfold sub, glb; intros. destruct x; destruct y; auto; destruct z; discriminate.
-Qed.
-
-Lemma glb_right: forall x y z, sub z x -> sub z y -> sub (glb x y) y.
-Proof.
-  unfold sub, glb; intros. destruct x; destruct y; auto; destruct z; discriminate.
-Qed.
-
-Lemma glb_max: forall x y z, sub z x -> sub z y -> sub z (glb x y).
-Proof.
-  unfold sub, glb; intros. destruct x; destruct z; try discriminate; destruct y; auto; discriminate.
-Qed.
-
-Definition low_bound (ty: typ) :=
-  match ty with Tfloat => Tsingle | _ => ty end.
-
-Definition high_bound (ty: typ) :=
-  match ty with Tsingle => Tfloat | _ => ty end.
-
-Lemma low_bound_sub: forall t, sub (low_bound t) t.
-Proof.
-  unfold sub; destruct t0; auto.
-Qed.
-
-Lemma low_bound_minorant: forall x y, sub x y -> sub (low_bound y) x.
-Proof.
-  unfold sub; intros. destruct x; destruct y; auto; discriminate.
-Qed.
-
-Lemma high_bound_sub: forall t, sub t (high_bound t).
-Proof.
-  unfold sub; destruct t0; auto.
-Qed.
-
-Lemma high_bound_majorant: forall x y, sub x y -> sub y (high_bound x).
-Proof.
-  unfold sub; intros. destruct x; destruct y; auto; discriminate.
-Qed.
-
-Definition weight (t: typ) :=
-  match t with Tfloat => 1%nat | _ => 0%nat end.
-
-Definition max_weight := 1%nat.
-
-Lemma weight_range: forall t, (weight t <= max_weight)%nat.
-Proof.
-  unfold max_weight; destruct t0; simpl; omega.
-Qed.
-
-Lemma weight_sub: forall x y, sub x y -> (weight x <= weight y)%nat.
-Proof.
-  unfold sub; intros. destruct x; destruct y; simpl; discriminate || omega.
-Qed.
-
-Lemma weight_sub_strict: forall x y, sub x y -> x <> y -> (weight x < weight y)%nat.
-Proof.
-  unfold sub, subtype; intros. destruct x; destruct y; simpl; congruence || omega.
-Qed.
-
 End RTLtypes.
 
-Module S := SubSolver(RTLtypes).
+Module S := UniSolver(RTLtypes).
 
 Section INFERENCE.
 
@@ -318,7 +211,7 @@ Fixpoint check_successors (sl: list node): res unit :=
 
 Definition type_ros (e: S.typenv) (ros: reg + ident) : res S.typenv :=
   match ros with
-  | inl r => S.type_use e r Tint
+  | inl r => S.set e r Tint
   | inr s => OK e
   end.
 
@@ -333,28 +226,28 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
       do x <- check_successor s;
       if is_move op then
         match args with
-        | arg :: nil => do (changed, e') <- S.type_move e arg res; OK e'
+        | arg :: nil => do (changed, e') <- S.move e res arg; OK e'
         | _ => Error (msg "ill-formed move")
         end
       else
        (let (targs, tres) := type_of_operation op in
-        do e1 <- S.type_uses e args targs; S.type_def e1 res tres)
+        do e1 <- S.set_list e args targs; S.set e1 res (normalize tres))
   | Iload chunk addr args dst s =>
       do x <- check_successor s;
-      do e1 <- S.type_uses e args (type_of_addressing addr);
-      S.type_def e1 dst (type_of_chunk chunk)
+      do e1 <- S.set_list e args (type_of_addressing addr);
+      S.set e1 dst (normalize (type_of_chunk chunk))
   | Istore chunk addr args src s =>
       do x <- check_successor s;
-      do e1 <- S.type_uses e args (type_of_addressing addr);
-      S.type_use e1 src (type_of_chunk_use chunk)
+      do e1 <- S.set_list e args (type_of_addressing addr);
+      S.set e1 src (type_of_chunk_use chunk)
   | Icall sig ros args res s =>
       do x <- check_successor s;
       do e1 <- type_ros e ros;
-      do e2 <- S.type_uses e1 args sig.(sig_args);
-      S.type_def e2 res (proj_sig_res sig)
+      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
+      S.set e2 res (normalize (proj_sig_res sig))
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
-      do e2 <- S.type_uses e1 args sig.(sig_args);
+      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
       if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
@@ -363,45 +256,48 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Ibuiltin ef args res s =>
       let sig := ef_sig ef in
       do x <- check_successor s;
-      do e1 <- S.type_uses e args sig.(sig_args);
-      S.type_def e1 res (proj_sig_res sig)
-  | Icond cond args s1 s2 =>
+      do e1 <- S.set_list e args (normalize_list sig.(sig_args));
+      S.set e1 res (normalize (proj_sig_res sig))
+ | Icond cond args s1 s2 =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
-      S.type_uses e args (type_of_condition cond)
-  | Ijumptable arg tbl =>
+      S.set_list e args (type_of_condition cond)
+ | Ijumptable arg tbl =>
       do x <- check_successors tbl;
-      do e1 <- S.type_use e arg Tint;
-      if zle (list_length_z tbl * 4) Int.max_unsigned then OK e1 else Error(msg "jumptable too big")
+      do e1 <- S.set e arg Tint;
+      if zle (list_length_z tbl * 4) Int.max_unsigned
+      then OK e1
+      else Error(msg "jumptable too big")
   | Ireturn optres =>
       match optres, f.(fn_sig).(sig_res) with
       | None, None => OK e
-      | Some r, Some t => S.type_use e r t
+      | Some r, Some t => S.set e r (normalize t)
       | _, _ => Error(msg "bad return")
       end
   end.
 
 Definition type_code (e: S.typenv): res S.typenv :=
-  PTree.fold
-    (fun re pc i =>
-       match re with
-       | Error _ => re
-       | OK e =>
-           match type_instr e i with
-           | Error msg => Error(MSG "At PC " :: POS pc :: MSG ": " :: msg)
-           | OK e' => OK e'
-           end
-       end)
-    f.(fn_code) (OK e).
+  PTree.fold (fun re pc i =>
+    match re with
+    | Error _ => re
+    | OK e =>
+        match type_instr e i with
+        | Error msg => Error(MSG "At PC " :: POS pc :: MSG ": " :: msg)
+        | OK e' => OK e'
+        end
+    end)
+  f.(fn_code) (OK e).
 
 (** Solve remaining constraints *)
 
-Definition check_params_norepet (params: list reg): res unit :=
-  if list_norepet_dec Reg.eq params then OK tt else Error(msg "duplicate parameters").
+Definition check_params_norepet (params: list reg): res unit := 
+  if list_norepet_dec Reg.eq params
+  then OK tt
+  else Error(msg "duplicate parameters").
 
 Definition type_function : res regenv :=
   do e1 <- type_code S.initial;
-  do e2 <- S.type_defs e1 f.(fn_params) f.(fn_sig).(sig_args);
+  do e2 <- S.set_list e1 f.(fn_params) (normalize_list f.(fn_sig).(sig_args));
   do te <- S.solve e2;
   do x1 <- check_params_norepet f.(fn_params);
   do x2 <- check_successor f.(fn_entrypoint);
@@ -419,10 +315,10 @@ Hint Resolve type_ros_incr: ty.
 
 Lemma type_ros_sound:
   forall e ros e' te, type_ros e ros = OK e' -> S.satisf te e' ->
-  match ros with inl r => subtype (te r) Tint = true | inr s => True end.
+  match ros with inl r => te r = Tint | inr s => True end.
 Proof.
   unfold type_ros; intros. destruct ros. 
-  eapply S.type_use_sound; eauto.
+  eapply S.set_sound; eauto.
   auto.
 Qed.
 
@@ -443,17 +339,6 @@ Proof.
   monadInv H. destruct H0. subst a; eauto with ty. eauto. 
 Qed.
 
-Remark subtype_list_charact:
-  forall tyl1 tyl2,
-  subtype_list tyl1 tyl2 = true <-> list_forall2 RTLtypes.sub tyl1 tyl2.
-Proof.
-  unfold RTLtypes.sub; intros; split; intros.
-  revert tyl1 tyl2 H. induction tyl1; destruct tyl2; simpl; intros; try discriminate.
-  constructor.
-  InvBooleans. constructor; auto. 
-  revert tyl1 tyl2 H. induction 1; simpl. auto. rewrite H; rewrite IHlist_forall2; auto.
-Qed.
-
 Lemma type_instr_incr:
   forall e i e' te,
   type_instr e i = OK e' -> S.satisf te e' -> S.satisf te e.
@@ -489,56 +374,56 @@ Proof.
   + assert (o = Omove) by (unfold is_move in ISMOVE; destruct o; congruence).
     subst o.
     destruct l; try discriminate. destruct l; monadInv EQ0.
-    constructor. eapply S.type_move_sound; eauto. eauto with ty.
+    constructor. eapply S.move_sound; eauto. eauto with ty.
   + destruct (type_of_operation o) as [targs tres] eqn:TYOP. monadInv EQ0.
     apply wt_Iop. 
     unfold is_move in ISMOVE; destruct o; congruence.
-    rewrite TYOP. rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
-    rewrite TYOP. eapply S.type_def_sound; eauto with ty.
+    rewrite TYOP. eapply S.set_list_sound; eauto with ty.
+    rewrite TYOP. eapply S.set_sound; eauto with ty.
     eauto with ty.
 - (* load *)
   constructor.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
-  eapply S.type_def_sound; eauto with ty.
+  eapply S.set_list_sound; eauto with ty.
+  eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* store *)
   constructor.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
-  eapply S.type_use_sound; eauto with ty.
+  eapply S.set_list_sound; eauto with ty.
+  eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* call *)
   constructor. 
   eapply type_ros_sound; eauto with ty.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
-  eapply S.type_def_sound; eauto with ty.
+  eapply S.set_list_sound; eauto with ty.
+  eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* tailcall *)
   destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
   destruct (tailcall_is_possible s) eqn:TCIP; inv EQ2.
   constructor.
   eapply type_ros_sound; eauto with ty. 
+  eapply S.set_list_sound; eauto with ty.
   auto.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
   apply tailcall_is_possible_correct; auto.
 - (* builtin *)
   constructor.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
-  eapply S.type_def_sound; eauto with ty.
+  eapply S.set_list_sound; eauto with ty.
+  eapply S.set_sound; eauto with ty.
   eauto with ty.
 - (* cond *)
   constructor.
-  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+  eapply S.set_list_sound; eauto with ty.
   eauto with ty.
   eauto with ty.
 - (* jumptable *)
   destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2.
   constructor.
-  eapply S.type_use_sound; eauto.
+  eapply S.set_sound; eauto.
   eapply check_successors_sound; eauto. 
   auto.
 - (* return *)
   simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
-  econstructor. eauto. eapply S.type_use_sound; eauto with ty.
+  econstructor. eauto. eapply S.set_sound; eauto with ty.
   inv H. constructor. auto. 
 Qed.
 
@@ -575,7 +460,7 @@ Proof.
   assert (SAT1: S.satisf env x) by (eauto with ty).
   constructor.
 - (* type of parameters *)
-  rewrite subtype_list_charact. eapply S.type_defs_sound; eauto.
+  eapply S.set_list_sound; eauto.
 - (* parameters are unique *)
   unfold check_params_norepet in EQ2. 
   destruct (list_norepet_dec Reg.eq (fn_params f)); inv EQ2; auto. 
@@ -590,11 +475,11 @@ Qed.
 Lemma type_ros_complete:
   forall te ros e,
   S.satisf te e ->
-  match ros with inl r => subtype (te r) Tint = true | inr s => True end ->
+  match ros with inl r => te r = Tint | inr s => True end ->
   exists e', type_ros e ros = OK e' /\ S.satisf te e'.
 Proof.
   intros; destruct ros; simpl. 
-  eapply S.type_use_complete; eauto.
+  eapply S.set_complete; eauto.
   exists e; auto.
 Qed.
 
@@ -615,67 +500,60 @@ Proof.
 - (* nop *)
   econstructor; split. rewrite check_successor_complete; simpl; eauto. auto.
 - (* move *)
-  exploit S.type_move_complete; eauto. intros (changed & e' & A & B).
+  exploit S.move_complete; eauto. intros (changed & e' & A & B).
   exists e'; split. rewrite check_successor_complete by auto; simpl. rewrite A; auto. auto.
 - (* other op *)
   destruct (type_of_operation op) as [targ tres]. simpl in *.
-  rewrite subtype_list_charact in H1.
-  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
-  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
+  exploit S.set_list_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   replace (is_move op) with false. rewrite A; simpl; rewrite C; auto.
   destruct op; reflexivity || congruence.
 - (* load *)
-  rewrite subtype_list_charact in H0.
-  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
-  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
+  exploit S.set_list_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* store *)
-  rewrite subtype_list_charact in H0.
-  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
-  exploit S.type_use_complete. eexact B. eauto. intros [e2 [C D]].
+  exploit S.set_list_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* call *)
   exploit type_ros_complete. eauto. eauto. intros [e1 [A B]].
-  rewrite subtype_list_charact in H1.
-  exploit S.type_uses_complete. eauto. eauto. intros [e2 [C D]].
-  exploit S.type_def_complete. eexact D. eauto. intros [e3 [E F]].
+  exploit S.set_list_complete. eauto. eauto. intros [e2 [C D]].
+  exploit S.set_complete. eexact D. eauto. intros [e3 [E F]].
   exists e3; split; auto. 
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; simpl; rewrite E; auto.
 - (* tailcall *)
   exploit type_ros_complete. eauto. eauto. intros [e1 [A B]].
-  rewrite subtype_list_charact in H2.
-  exploit S.type_uses_complete. eauto. eauto. intros [e2 [C D]].
+  exploit S.set_list_complete. eauto. eauto. intros [e2 [C D]].
   exists e2; split; auto. 
   rewrite A; simpl; rewrite C; simpl. 
-  rewrite H1; rewrite dec_eq_true. 
+  rewrite H2; rewrite dec_eq_true. 
   replace (tailcall_is_possible sig) with true; auto. 
   revert H3. unfold tailcall_possible, tailcall_is_possible. generalize (loc_arguments sig). 
   induction l; simpl; intros. auto.
   exploit (H3 a); auto. intros. destruct a; try contradiction. apply IHl.
   intros; apply H3; auto. 
 - (* builtin *)
-  rewrite subtype_list_charact in H0.
-  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
-  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
+  exploit S.set_list_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* cond *)
-  rewrite subtype_list_charact in H0.
-  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_list_complete. eauto. eauto. intros [e1 [A B]].
   exists e1; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite check_successor_complete by auto; simpl.
   auto.
 - (* jumptbl *)
-  exploit S.type_use_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.set_complete. eauto. eauto. intros [e1 [A B]].
   exists e1; split; auto.
   replace (check_successors tbl) with (OK tt). simpl. 
   rewrite A; simpl. apply zle_true; auto. 
@@ -685,7 +563,7 @@ Proof.
 - (* return none *)
   rewrite H0. exists e; auto.
 - (* return some *)
-  rewrite H0. apply S.type_use_complete; auto.
+  rewrite H0. apply S.set_complete; auto.
 Qed.
 
 Lemma type_code_complete:
@@ -719,8 +597,7 @@ Proof.
   intros. destruct H. 
   destruct (type_code_complete te S.initial) as (e1 & A & B).
   auto. apply S.satisf_initial. 
-  destruct (S.type_defs_complete te f.(fn_params) f.(fn_sig).(sig_args) e1) as (e2 & C & D); auto.
-  rewrite <- subtype_list_charact; auto.
+  destruct (S.set_list_complete te f.(fn_params) (normalize_list f.(fn_sig).(sig_args)) e1) as (e2 & C & D); auto.
   destruct (S.solve_complete te e2) as (te' & E); auto.
   exists te'; unfold type_function.
   rewrite A; simpl. rewrite C; simpl. rewrite E; simpl. 
@@ -760,6 +637,23 @@ Proof.
   apply H.
 Qed.
 
+Lemma normalize_subtype:
+  forall ty, subtype ty (normalize ty) = true.
+Proof.
+  intros. destruct ty; reflexivity.
+Qed.
+
+Lemma wt_regset_assign2:
+  forall env rs v r ty,
+  wt_regset env rs ->
+  Val.has_type v ty ->
+  env r = normalize ty ->
+  wt_regset env (rs#r <- v).
+Proof.
+  intros. eapply wt_regset_assign; eauto.
+  rewrite H1. eapply Val.has_subtype; eauto. apply normalize_subtype.
+Qed.
+
 Lemma wt_regset_list:
   forall env rs,
   wt_regset env rs ->
@@ -788,11 +682,11 @@ Lemma wt_exec_Iop:
   wt_regset env (rs#res <- v).
 Proof.
   intros. inv H. 
-  simpl in H0. inv H0. apply wt_regset_assign; auto. 
-  eapply Val.has_subtype; eauto. 
-  apply wt_regset_assign; auto.
-  eapply Val.has_subtype; eauto.
+  simpl in H0. inv H0. apply wt_regset_assign; auto.
+  rewrite H4; auto.
+  eapply wt_regset_assign2; auto.
   eapply type_of_operation_sound; eauto.
+  auto.
 Qed.
 
 Lemma wt_exec_Iload:
@@ -803,8 +697,7 @@ Lemma wt_exec_Iload:
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  apply wt_regset_assign; auto. 
-  eapply Val.has_subtype; eauto. 
+  eapply wt_regset_assign2; eauto.
   eapply Mem.load_type; eauto.
 Qed.
 
@@ -816,8 +709,7 @@ Lemma wt_exec_Ibuiltin:
   wt_regset env (rs#res <- vres).
 Proof.
   intros. inv H. 
-  apply wt_regset_assign; auto. 
-  eapply Val.has_subtype; eauto.
+  eapply wt_regset_assign2; eauto. 
   eapply external_call_well_typed; eauto.
 Qed.
 
@@ -828,36 +720,46 @@ Proof.
   intros. inv H. eauto. 
 Qed.
 
-Inductive wt_stackframes: list stackframe -> option typ -> Prop :=
-  | wt_stackframes_nil:
-      wt_stackframes nil (Some Tint)
+Inductive wt_stackframes: list stackframe -> signature -> Prop :=
+  | wt_stackframes_nil: forall sg,
+      sg.(sig_res) = Some Tint ->
+      wt_stackframes nil sg
   | wt_stackframes_cons:
-      forall s res f sp pc rs env tyres,
+      forall s res f sp pc rs env sg,
       wt_function f env ->
       wt_regset env rs ->
-      subtype (match tyres with None => Tint | Some t => t end) (env res) = true ->
-      wt_stackframes s (sig_res (fn_sig f)) ->
-      wt_stackframes (Stackframe res f sp pc rs :: s) tyres.
+      env res = normalize (proj_sig_res sg) ->
+      wt_stackframes s (fn_sig f) ->
+      wt_stackframes (Stackframe res f sp pc rs :: s) sg.
 
 Inductive wt_state: state -> Prop :=
   | wt_state_intro:
       forall s f sp pc rs m env
-        (WT_STK: wt_stackframes s (sig_res (fn_sig f)))
+        (WT_STK: wt_stackframes s (fn_sig f))
         (WT_FN: wt_function f env)
         (WT_RS: wt_regset env rs),
       wt_state (State s f sp pc rs m)
   | wt_state_call:
       forall s f args m,
-      wt_stackframes s (sig_res (funsig f)) ->
+      wt_stackframes s (funsig f) ->
       wt_fundef f ->
-      Val.has_type_list args (sig_args (funsig f)) ->
+      Val.has_type_list args (normalize_list (sig_args (funsig f))) ->
       wt_state (Callstate s f args m)
   | wt_state_return:
-      forall s v m tyres,
-      wt_stackframes s tyres ->
-      Val.has_type v (match tyres with None => Tint | Some t => t end) ->
+      forall s v m sg,
+      wt_stackframes s sg ->
+      Val.has_type v (normalize (proj_sig_res sg)) ->
       wt_state (Returnstate s v m).
 
+Remark wt_stackframes_change_sig:
+  forall s sg1 sg2,
+  sg1.(sig_res) = sg2.(sig_res) -> wt_stackframes s sg1 -> wt_stackframes s sg2.
+Proof.
+  intros. inv H0. 
+- constructor; congruence.
+- econstructor; eauto. rewrite H3. unfold proj_sig_res. rewrite H. auto. 
+Qed.
+
 Section SUBJECT_REDUCTION.
 
 Variable p: program.
@@ -891,7 +793,7 @@ Proof.
     discriminate.
   econstructor; eauto.
   econstructor; eauto. inv WTI; auto. 
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list. auto.
+  inv WTI. rewrite <- H8. apply wt_regset_list. auto.
   (* Itailcall *)
   assert (wt_fundef fd).
     destruct ros; simpl in H0.
@@ -902,8 +804,8 @@ Proof.
     exact wt_p. exact H0.
     discriminate.
   econstructor; eauto.
-  inv WTI. rewrite H7; auto. 
-  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list. auto.
+  inv WTI. apply wt_stackframes_change_sig with (fn_sig f); auto.
+  inv WTI. rewrite <- H7. apply wt_regset_list. auto.
   (* Ibuiltin *)
   econstructor; eauto. eapply wt_exec_Ibuiltin; eauto.
   (* Icond *)
@@ -912,18 +814,40 @@ Proof.
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto. 
-  inv WTI; simpl. auto. rewrite H2. eapply Val.has_subtype; eauto. 
+  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. rewrite <- H3. auto. 
   (* internal function *)
   simpl in *. inv H5.
   econstructor; eauto.
-  inv H1. apply wt_init_regs; auto. eapply Val.has_subtype_list; eauto. 
+  inv H1. apply wt_init_regs; auto. rewrite wt_params0. auto. 
   (* external function *)
   econstructor; eauto. simpl.  
-  change (Val.has_type res (proj_sig_res (ef_sig ef))).
+  change (Val.has_type res (normalize (proj_sig_res (ef_sig ef)))).
+  eapply Val.has_subtype. apply normalize_subtype. 
   eapply external_call_well_typed; eauto.
   (* return *)
   inv H1. econstructor; eauto.
-  apply wt_regset_assign; auto. eapply Val.has_subtype; eauto. 
+  apply wt_regset_assign; auto. rewrite H10; auto. 
+Qed.
+
+Lemma wt_initial_state:
+  forall S, initial_state p S -> wt_state S.
+Proof.
+  intros. inv H. constructor. constructor. rewrite H3; auto. 
+  pattern f. apply Genv.find_funct_ptr_prop with fundef unit p b.
+  exact wt_p. exact H2.
+  rewrite H3. constructor.
+Qed.
+
+Lemma wt_instr_inv:
+  forall s f sp pc rs m i,
+  wt_state (State s f sp pc rs m) ->
+  f.(fn_code)!pc = Some i ->
+  exists env, wt_instr f env i /\ wt_regset env rs.
+Proof.
+  intros. inv H. exists env; split; auto. 
+  inv WT_FN. eauto. 
 Qed.
 
 End SUBJECT_REDUCTION.
+
+  
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 9b144cb..b69f1ec 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -664,17 +664,12 @@ Record agree_frame (j: meminj) (ls ls0: locset)
 
     (** Permissions on the frame part of the Mach stack block *)
     agree_perm:
-      frame_perm_freeable m' sp';
-
-    (** Current locset is well-typed *)
-    agree_wt_ls:
-      wt_locset ls
+      frame_perm_freeable m' sp'
   }.
 
 Hint Resolve agree_unused_reg agree_locals agree_outgoing agree_incoming
              agree_link agree_retaddr agree_saved_int agree_saved_float
-             agree_valid_linear agree_valid_mach agree_perm
-             agree_wt_ls: stacking.
+             agree_valid_linear agree_valid_mach agree_perm: stacking.
 
 (** Auxiliary predicate used at call points *)
 
@@ -793,19 +788,16 @@ Lemma agree_frame_set_reg:
   forall j ls ls0 m sp m' sp' parent ra r v,
   agree_frame j ls ls0 m sp m' sp' parent ra ->
   mreg_within_bounds b r ->
-   Val.has_type v (Loc.type (R r)) ->
   agree_frame j (Locmap.set (R r) v ls) ls0 m sp m' sp' parent ra.
 Proof.
   intros. inv H; constructor; auto; intros.
   rewrite Locmap.gso. auto. red. intuition congruence.
-  apply wt_setreg; auto.
 Qed.
 
 Lemma agree_frame_set_regs:
   forall j ls0 m sp m' sp' parent ra rl vl ls,
   agree_frame j ls ls0 m sp m' sp' parent ra ->
   (forall r, In r rl -> mreg_within_bounds b r) ->
-  Val.has_type_list vl (map Loc.type (map R rl)) ->
   agree_frame j (Locmap.setlist (map R rl) vl ls) ls0 m sp m' sp' parent ra.
 Proof.
   induction rl; destruct vl; simpl; intros; intuition.
@@ -821,7 +813,7 @@ Lemma agree_frame_undef_regs:
 Proof.
   induction regs; simpl; intros.
   auto.
-  apply agree_frame_set_reg; auto. red; auto.
+  apply agree_frame_set_reg; auto.
 Qed.
 
 Lemma caller_save_reg_within_bounds:
@@ -852,17 +844,18 @@ Lemma agree_frame_set_local:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
+  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4.
   constructor; auto; intros.
 (* local *)
   unfold Locmap.set.
   destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj'; eauto with stacking.
+  inv e. eapply gss_index_contains_inj; eauto with stacking.
   destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)).
   eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. 
   simpl. simpl in d. intuition.
@@ -883,8 +876,6 @@ Proof.
   eauto with mem.
 (* perm *)
   red; intros. eapply Mem.perm_store_1; eauto.
-(* wt *)
-  apply wt_setstack; auto. 
 Qed.
 
 (** Preservation by assignment to outgoing slot *)
@@ -893,19 +884,20 @@ Lemma agree_frame_set_outgoing:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
+  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4.
   constructor; auto; intros.
 (* local *)
   rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto.
   red; left; congruence.
 (* outgoing *)
   unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj'; eauto with stacking.
+  inv e. eapply gss_index_contains_inj; eauto with stacking.
   destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
   eapply gso_index_contains_inj; eauto with stacking.
   red. red in d. intuition. 
@@ -923,8 +915,6 @@ Proof.
   eauto with mem stacking.
 (* perm *)
   red; intros. eapply Mem.perm_store_1; eauto.
-(* wt *)
-  apply wt_setstack; auto. 
 Qed.
 
 (** General invariance property with respect to memory changes. *)
@@ -1051,7 +1041,6 @@ Lemma agree_frame_return:
   forall j ls ls0 m sp m' sp' parent retaddr ls',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   agree_callee_save ls' ls ->
-  wt_locset ls' ->
   agree_frame j ls' ls0 m sp m' sp' parent retaddr.
 Proof.
   intros. red in H0. inv H; constructor; auto; intros.
@@ -1247,7 +1236,7 @@ Hypothesis csregs_typ:
 Hypothesis ls_temp_undef:
   forall r, In r (destroyed_by_setstack ty) -> ls (R r) = Vundef.
 
-Hypothesis wt_ls: wt_locset ls.
+Hypothesis wt_ls: forall r, Val.has_type (ls (R r)) (mreg_type r).
 
 Lemma save_callee_save_regs_correct:
   forall l k rs m,
@@ -1304,7 +1293,8 @@ Proof.
   simpl in H3. destruct (mreg_eq a r). subst r.
   assert (index_contains_inj j m1 sp (mkindex (number a)) (ls (R a))).
     eapply gss_index_contains_inj; eauto.
-    rewrite mkindex_typ. rewrite <- (csregs_typ a). apply wt_ls. auto with coqlib.
+    rewrite mkindex_typ. rewrite <- (csregs_typ a). apply wt_ls. 
+    auto with coqlib.
   destruct H5 as [v' [P Q]].
   exists v'; split; auto. apply C; auto. 
   intros. apply mkindex_inj. apply number_inj; auto with coqlib. 
@@ -1354,7 +1344,8 @@ Qed.
 Lemma save_callee_save_correct:
   forall j ls0 rs sp cs fb k m,
   let ls := LTL.undef_regs destroyed_at_function_entry ls0 in
-  agree_regs j ls rs -> wt_locset ls ->
+  agree_regs j ls rs ->
+  (forall r, Val.has_type (ls (R r)) (mreg_type r)) ->
   frame_perm_freeable m sp ->
   exists rs', exists m',
     star step tge 
@@ -1480,7 +1471,7 @@ Lemma function_prologue_correct:
   forall j ls ls0 ls1 rs rs1 m1 m1' m2 sp parent ra cs fb k,
   agree_regs j ls rs ->
   agree_callee_save ls ls0 ->
-  wt_locset ls ->
+  (forall r, Val.has_type (ls (R r)) (mreg_type r)) ->
   ls1 = LTL.undef_regs destroyed_at_function_entry (LTL.call_regs ls) ->
   rs1 = undef_regs destroyed_at_function_entry rs ->
   Mem.inject j m1 m1' ->
@@ -1540,7 +1531,7 @@ Proof.
     instantiate (1 := rs1). instantiate (1 := call_regs ls). instantiate (1 := j').
     subst rs1. apply agree_regs_undef_regs. 
     apply agree_regs_call_regs. eapply agree_regs_inject_incr; eauto.
-    apply wt_undef_regs. apply wt_call_regs. auto.
+    intros. apply undef_regs_type. simpl. apply WTREGS.
     eexact PERM4.
   rewrite <- LS1.
   intros [rs' [m5' [STEPS [ICS [FCS [OTHERS [STORES [PERM5 AGREGS']]]]]]]].
@@ -1622,8 +1613,6 @@ Proof.
     exploit Mem.perm_alloc_inv. eexact ALLOC. eauto. rewrite dec_eq_true. auto.
     (* perms *)
     auto.
-    (* wt *)
-    rewrite LS1. apply wt_undef_regs. apply wt_call_regs; auto.
   (* incr *)
   split. auto.
   (* separated *)
@@ -1866,7 +1855,6 @@ Inductive match_stacks (j: meminj) (m m': mem):
       match_stacks j m m' nil nil sg bound bound'
   | match_stacks_cons: forall f sp ls c cs fb sp' ra c' cs' sg bound bound' trf
         (TAIL: is_tail c (Linear.fn_code f))
-        (WTF: wt_function f = true)
         (FINDF: Genv.find_funct_ptr tge fb = Some (Internal trf))
         (TRF: transf_function f = OK trf)
         (TRC: transl_code (make_env (function_bounds f)) c = c')
@@ -2055,16 +2043,6 @@ Qed.
 
 (** Typing properties of [match_stacks]. *)
 
-Lemma match_stacks_wt_locset:
-  forall j m m' cs cs' sg bound bound',
-  match_stacks j m m' cs cs' sg bound bound' ->
-  wt_locset (parent_locset cs).
-Proof.
-  induction 1; simpl.
-  unfold Locmap.init; red; intros; red; auto.
-  inv FRM; auto.
-Qed.
-
 Lemma match_stacks_type_sp:
   forall j m m' cs cs' sg bound bound',
   match_stacks j m m' cs cs' sg bound bound' ->
@@ -2457,7 +2435,6 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
         (STACKS: match_stacks j m m' cs cs' f.(Linear.fn_sig) sp sp')
         (TRANSL: transf_function f = OK tf)
         (FIND: Genv.find_funct_ptr tge fb = Some (Internal tf))
-        (WTF: wt_function f = true)
         (AGREGS: agree_regs j ls rs)
         (AGFRAME: agree_frame f j ls (parent_locset cs) m sp m' sp' (parent_sp cs') (parent_ra cs'))
         (TAIL: is_tail c (Linear.fn_code f)),
@@ -2469,7 +2446,6 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
         (STACKS: match_stacks j m m' cs cs' (Linear.funsig f) (Mem.nextblock m) (Mem.nextblock m'))
         (TRANSL: transf_fundef f = OK tf)
         (FIND: Genv.find_funct_ptr tge fb = Some tf)
-        (WTLS: wt_locset ls)
         (AGREGS: agree_regs j ls rs)
         (AGLOCS: agree_callee_save ls (parent_locset cs)),
       match_states (Linear.Callstate cs f ls m)
@@ -2478,7 +2454,6 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
       forall cs ls m cs' rs m' j sg 
         (MINJ: Mem.inject j m m')
         (STACKS: match_stacks j m m' cs cs' sg (Mem.nextblock m) (Mem.nextblock m'))
-        (WTLS: wt_locset ls)
         (AGREGS: agree_regs j ls rs)
         (AGLOCS: agree_callee_save ls (parent_locset cs)),
       match_states (Linear.Returnstate cs ls m)
@@ -2486,37 +2461,40 @@ Inductive match_states: Linear.state -> Mach.state -> Prop :=
 
 Theorem transf_step_correct:
   forall s1 t s2, Linear.step ge s1 t s2 ->
-  forall s1' (MS: match_states s1 s1'),
+  forall (WTS: wt_state s1) s1' (MS: match_states s1 s1'),
   exists s2', plus step tge s1' t s2' /\ match_states s2 s2'.
 Proof.
+(*
   assert (USEWTF: forall f i c,
           wt_function f = true -> is_tail (i :: c) (Linear.fn_code f) ->
           wt_instr f i = true).
     intros. unfold wt_function, wt_code in H. rewrite forallb_forall in H.
     apply H. eapply is_tail_in; eauto. 
+*)
   induction 1; intros;
   try inv MS;
   try rewrite transl_code_eq;
-  try (generalize (USEWTF _ _ _ WTF TAIL); intro WTI; simpl in WTI; InvBooleans);
   try (generalize (function_is_within_bounds f _ (is_tail_in TAIL));
        intro BOUND; simpl in BOUND);
   unfold transl_instr.
 
-  (* Lgetstack *)
-  destruct BOUND. unfold destroyed_by_getstack; destruct sl.
-  (* Lgetstack, local *)
+- (* Lgetstack *)
+  destruct BOUND.
+  exploit wt_state_getstack; eauto. intros SV.
+  unfold destroyed_by_getstack; destruct sl.
++ (* Lgetstack, local *)
   exploit agree_locals; eauto. intros [v [A B]].
   econstructor; split.
   apply plus_one. apply exec_Mgetstack. 
   eapply index_contains_load_stack; eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
-  apply agree_frame_set_reg; auto. simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
-  (* Lgetstack, incoming *)
-  unfold slot_valid in H0. InvBooleans.
+  apply agree_frame_set_reg; auto.
++ (* Lgetstack, incoming *)
+  unfold slot_valid in SV. InvBooleans.
   exploit incoming_slot_in_parameters; eauto. intros IN_ARGS.
   inversion STACKS; clear STACKS.
-  elim (H7 _ IN_ARGS).
+  elim (H6 _ IN_ARGS).
   subst bound bound' s cs'.
   exploit agree_outgoing. eexact FRM. eapply ARGS; eauto.
   exploit loc_arguments_acceptable; eauto. intros [A B].
@@ -2537,8 +2515,7 @@ Proof.
   eapply agree_frame_set_reg; eauto. eapply agree_frame_set_reg; eauto. 
   apply caller_save_reg_within_bounds. 
   apply temp_for_parent_frame_caller_save.
-  simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
-  (* Lgetstack, outgoing *)
++ (* Lgetstack, outgoing *)
   exploit agree_outgoing; eauto. intros [v [A B]].
   econstructor; split.
   apply plus_one. apply exec_Mgetstack. 
@@ -2546,66 +2523,55 @@ Proof.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
   apply agree_frame_set_reg; auto.
-  simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
 
-  (* Lsetstack *)
+- (* Lsetstack *)
+  exploit wt_state_setstack; eauto. intros (VTY & SV & SW). 
   set (idx := match sl with
               | Local => FI_local ofs ty
               | Incoming => FI_link (*dummy*)
               | Outgoing => FI_arg ofs ty
               end).
   assert (index_valid f idx).
-    unfold idx; destruct sl.
+  { unfold idx; destruct sl.
     apply index_local_valid; auto.
     red; auto.
-    apply index_arg_valid; auto.
+    apply index_arg_valid; auto. }
   exploit store_index_succeeds; eauto. eapply agree_perm; eauto.
   instantiate (1 := rs0 src). intros [m1' STORE].
   econstructor; split.
-  apply plus_one. destruct sl; simpl in H0.
+  apply plus_one. destruct sl; simpl in SW.
     econstructor. eapply store_stack_succeeds with (idx := idx); eauto. eauto.
     discriminate.
     econstructor. eapply store_stack_succeeds with (idx := idx); eauto. auto. 
   econstructor. 
   eapply Mem.store_outside_inject; eauto. 
     intros. exploit agree_inj_unique; eauto. intros [EQ1 EQ2]; subst b' delta.
-    rewrite size_type_chunk in H4.
+    rewrite size_type_chunk in H2.
     exploit offset_of_index_disj_stack_data_2; eauto.
     exploit agree_bounds. eauto. apply Mem.perm_cur_max. eauto. 
     omega.
   apply match_stacks_change_mach_mem with m'; auto.
-  eauto with mem. eauto with mem. intros. rewrite <- H3; eapply Mem.load_store_other; eauto. left; apply Plt_ne; auto. 
+  eauto with mem. eauto with mem. intros. rewrite <- H1; eapply Mem.load_store_other; eauto. left; apply Plt_ne; auto. 
   eauto. eauto. auto. 
   apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. 
   destruct sl.
-    eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. apply AGREGS. 
-    assumption. 
-    simpl in H0; discriminate.
-    eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. apply AGREGS.
+  + eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
+    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS. 
     assumption. 
-  eauto with coqlib.
+  + simpl in SW; discriminate.
+  + eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
+    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS.
+    assumption.
+  + eauto with coqlib.
 
-  (* Lop *)
-  assert (Val.has_type v (mreg_type res)).
-    destruct (is_move_operation op args) as [arg|] eqn:?.
-    exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst op args. 
-    eapply Val.has_subtype; eauto. simpl in H; inv H. eapply agree_wt_ls; eauto. 
-    destruct (type_of_operation op) as [targs tres] eqn:TYOP. 
-    eapply Val.has_subtype; eauto. 
-    replace tres with (snd (type_of_operation op)).
-    eapply type_of_operation_sound; eauto.
-    red; intros. subst op. simpl in Heqo.
-    destruct args; simpl in H. discriminate. destruct args. discriminate. simpl in H. discriminate.
-    rewrite TYOP; auto. 
+- (* Lop *)
   assert (exists v',
           eval_operation ge (Vptr sp' Int.zero) (transl_op (make_env (function_bounds f)) op) rs0##args m' = Some v'
        /\ val_inject j v v').
   eapply eval_operation_inject; eauto.
   eapply match_stacks_preserves_globals; eauto.
   eapply agree_inj; eauto. eapply agree_reglist; eauto.
-  destruct H1 as [v' [A B]].
+  destruct H0 as [v' [A B]].
   econstructor; split. 
   apply plus_one. econstructor. 
   instantiate (1 := v'). rewrite <- A. apply eval_operation_preserved. 
@@ -2616,7 +2582,7 @@ Proof.
   apply agree_frame_set_reg; auto. apply agree_frame_undef_locs; auto.
   apply destroyed_by_op_caller_save.
 
-  (* Lload *)
+- (* Lload *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Int.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
        /\ val_inject j a a').
@@ -2633,9 +2599,8 @@ Proof.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto. 
   apply agree_frame_set_reg. apply agree_frame_undef_locs; auto.
   apply destroyed_by_load_caller_save. auto. 
-  simpl. eapply Val.has_subtype; eauto. destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
 
-  (* Lstore *)
+- (* Lstore *)
   assert (exists a',
           eval_addressing ge (Vptr sp' Int.zero) (transl_addr (make_env (function_bounds f)) addr) rs0##args = Some a'
        /\ val_inject j a a').
@@ -2656,7 +2621,7 @@ Proof.
   eapply agree_frame_parallel_stores; eauto.
   apply destroyed_by_store_caller_save. 
 
-  (* Lcall *)
+- (* Lcall *)
   exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
   exploit is_tail_transf_function; eauto. intros IST.
   rewrite transl_code_eq in IST. simpl in IST.
@@ -2672,10 +2637,9 @@ Proof.
     apply loc_arguments_bounded; auto.
   eapply agree_valid_linear; eauto.
   eapply agree_valid_mach; eauto.
-  eapply agree_wt_ls; eauto. 
   simpl; red; auto.
 
-  (* Ltailcall *)
+- (* Ltailcall *)
   exploit function_epilogue_correct; eauto.
   intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]].
   exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
@@ -2688,14 +2652,13 @@ Proof.
   apply match_stacks_change_mach_mem with m'0.
   auto. 
   eauto with mem. intros. eapply Mem.perm_free_1; eauto. left; apply Plt_ne; auto. 
-  intros. rewrite <- H3. eapply Mem.load_free; eauto. left; apply Plt_ne; auto. 
+  intros. rewrite <- H1. eapply Mem.load_free; eauto. left; apply Plt_ne; auto. 
   eauto with mem. intros. eapply Mem.perm_free_3; eauto. 
   apply Plt_Ple. change (Mem.valid_block m' stk). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_linear; eauto. 
   apply Plt_Ple. change (Mem.valid_block m1' sp'). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_mach; eauto. 
-  apply zero_size_arguments_tailcall_possible; auto. 
-  apply wt_return_regs; auto. eapply match_stacks_wt_locset; eauto. eapply agree_wt_ls; eauto.
+  apply zero_size_arguments_tailcall_possible. eapply wt_state_tailcall; eauto. 
 
-  (* Lbuiltin *)
+- (* Lbuiltin *)
   exploit external_call_mem_inject'; eauto. 
     eapply match_stacks_preserves_globals; eauto.
     eapply agree_reglist; eauto. 
@@ -2717,12 +2680,9 @@ Proof.
   intros. inv H; eapply external_call_max_perm; eauto. eapply agree_valid_linear; eauto.
   eapply external_call_valid_block'; eauto.
   eapply agree_valid_mach; eauto.
-  simpl. rewrite list_map_compose.
-  change (fun x => Loc.type (R x)) with mreg_type.
-  eapply Val.has_subtype_list; eauto. eapply external_call_well_typed'; eauto.
 
-  (* Lannot *)
-  exploit transl_annot_params_correct; eauto.
+- (* Lannot *)
+  exploit transl_annot_params_correct; eauto. eapply wt_state_annot; eauto.
   intros [vargs' [P Q]]. 
   exploit external_call_mem_inject'; eauto. 
     eapply match_stacks_preserves_globals; eauto.
@@ -2743,49 +2703,49 @@ Proof.
   eapply external_call_valid_block'; eauto.
   eapply agree_valid_mach; eauto.
 
-  (* Llabel *)
+- (* Llabel *)
   econstructor; split.
   apply plus_one; apply exec_Mlabel.
   econstructor; eauto with coqlib.
 
-  (* Lgoto *)
+- (* Lgoto *)
   econstructor; split.
   apply plus_one; eapply exec_Mgoto; eauto.
   apply transl_find_label; eauto.
   econstructor; eauto. 
   eapply find_label_tail; eauto.
 
-  (* Lcond, true *)
+- (* Lcond, true *)
   econstructor; split.
   apply plus_one. eapply exec_Mcond_true; eauto.
   eapply eval_condition_inject; eauto. eapply agree_reglist; eauto.
   eapply transl_find_label; eauto.
-  econstructor. eauto. eauto. eauto. eauto. eauto. 
+  econstructor. eauto. eauto. eauto. eauto.
   apply agree_regs_undef_regs; auto. 
   apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. 
   eapply find_label_tail; eauto.
 
-  (* Lcond, false *)
+- (* Lcond, false *)
   econstructor; split.
   apply plus_one. eapply exec_Mcond_false; eauto.
   eapply eval_condition_inject; eauto. eapply agree_reglist; eauto.
-  econstructor. eauto. eauto. eauto. eauto. eauto. 
+  econstructor. eauto. eauto. eauto. eauto.
   apply agree_regs_undef_regs; auto. 
   apply agree_frame_undef_locs; auto. apply destroyed_by_cond_caller_save. 
   eauto with coqlib.
 
-  (* Ljumptable *)
+- (* Ljumptable *)
   assert (rs0 arg = Vint n).
-    generalize (AGREGS arg). rewrite H. intro IJ; inv IJ; auto.
+  { generalize (AGREGS arg). rewrite H. intro IJ; inv IJ; auto. }
   econstructor; split.
   apply plus_one; eapply exec_Mjumptable; eauto. 
   apply transl_find_label; eauto.
-  econstructor. eauto. eauto. eauto. eauto. eauto.
+  econstructor. eauto. eauto. eauto. eauto.
   apply agree_regs_undef_regs; auto.
   apply agree_frame_undef_locs; auto. apply destroyed_by_jumptable_caller_save.
   eapply find_label_tail; eauto.
 
-  (* Lreturn *)
+- (* Lreturn *)
   exploit function_epilogue_correct; eauto.
   intros [rs1 [m1' [P [Q [R [S [T [U V]]]]]]]].
   econstructor; split.
@@ -2801,13 +2761,12 @@ Proof.
   eauto with mem. intros. eapply Mem.perm_free_3; eauto. 
   apply Plt_Ple. change (Mem.valid_block m' stk). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_linear; eauto. 
   apply Plt_Ple. change (Mem.valid_block m1' sp'). eapply Mem.valid_block_free_1; eauto. eapply agree_valid_mach; eauto. 
-  apply wt_return_regs; auto. eapply match_stacks_wt_locset; eauto. eapply agree_wt_ls; eauto.
 
-  (* internal function *)
+- (* internal function *)
   revert TRANSL. unfold transf_fundef, transf_partial_fundef.
   caseEq (transf_function f); simpl; try congruence.
   intros tfn TRANSL EQ. inversion EQ; clear EQ; subst tf.
-  exploit function_prologue_correct; eauto.
+  exploit function_prologue_correct; eauto. eapply wt_callstate_wt_regs; eauto.
   eapply match_stacks_type_sp; eauto. 
   eapply match_stacks_type_retaddr; eauto.
   intros [j' [rs' [m2' [sp' [m3' [m4' [m5' [A [B [C [D [E [F [G [J [K L]]]]]]]]]]]]]]]].
@@ -2828,10 +2787,9 @@ Proof.
   intros. eapply stores_in_frame_perm; eauto with mem.
   intros. rewrite <- H1. transitivity (Mem.load chunk m2' b ofs). eapply stores_in_frame_contents; eauto.
   eapply Mem.load_alloc_unchanged; eauto. red. congruence.
-  eapply transf_function_well_typed; eauto.
   auto with coqlib.
 
-  (* external function *)
+- (* external function *)
   simpl in TRANSL. inversion TRANSL; subst tf.
   exploit transl_external_arguments; eauto. intros [vl [ARGS VINJ]].
   exploit external_call_mem_inject'; eauto. 
@@ -2846,11 +2804,10 @@ Proof.
   inv H0; inv A. eapply match_stack_change_extcall; eauto. apply Ple_refl. apply Ple_refl. 
   eapply external_call_nextblock'; eauto.
   eapply external_call_nextblock'; eauto.
-  inv H0. apply wt_setlist_result. eapply external_call_well_typed; eauto. auto.
   apply agree_regs_set_regs; auto. apply agree_regs_inject_incr with j; auto. 
   apply agree_callee_save_set_result; auto. 
 
-  (* return *)
+- (* return *)
   inv STACKS. simpl in AGLOCS.  
   econstructor; split.
   apply plus_one. apply exec_return. 
@@ -2879,7 +2836,6 @@ Proof.
     intros. change (Mem.valid_block m0 b0). eapply Genv.find_funct_ptr_not_fresh; eauto.
     intros. change (Mem.valid_block m0 b0). eapply Genv.find_var_info_not_fresh; eauto.
   rewrite H3. red; intros. contradiction.
-  apply wt_init.
   unfold Locmap.init. red; intros; auto.
   unfold parent_locset. red; auto.
 Qed.
@@ -2893,14 +2849,31 @@ Proof.
   econstructor; eauto. 
 Qed.
 
+Lemma wt_prog:
+  forall i fd, In (i, Gfun fd) prog.(prog_defs) -> wt_fundef fd.
+Proof.
+  intros. exploit transform_partial_program_succeeds; eauto. 
+  intros [tfd TF]. destruct fd; simpl in *. 
+- monadInv TF. unfold transf_function in EQ.
+  destruct (wt_function f). auto. discriminate.  
+- auto.
+Qed.
+
 Theorem transf_program_correct:
   forward_simulation (Linear.semantics prog) (Mach.semantics return_address_offset tprog).
 Proof.
-  eapply forward_simulation_plus.
-  eexact symbols_preserved.
-  eexact transf_initial_states.
-  eexact transf_final_states.
-  eexact transf_step_correct. 
+  set (ms := fun s s' => wt_state s /\ match_states s s').
+  eapply forward_simulation_plus with (match_states := ms). 
+- exact symbols_preserved.
+- intros. exploit transf_initial_states; eauto. intros [st2 [A B]]. 
+  exists st2; split; auto. split; auto.
+  apply wt_initial_state with (prog := prog); auto. exact wt_prog. 
+- intros. destruct H. eapply transf_final_states; eauto. 
+- intros. destruct H0. 
+  exploit transf_step_correct; eauto. intros [s2' [A B]].
+  exists s2'; split. exact A. split.
+  eapply step_type_preservation; eauto. eexact wt_prog. eexact H. 
+  auto.
 Qed.
 
 End PRESERVATION.
-- 
cgit v1.2.3