From 2a0168fea37b68ad14e2cb60bf215111e49d4870 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Wed, 23 Jul 2014 08:54:56 +0000
Subject: Merge of "newspilling" branch: - Support single-precision floats as
 first-class values - Introduce chunks Many32, Many64 and types Tany32, Tany64
 to   support saving and restoring registers without knowing   the exact types
 (int/single/float) of their contents, just   their sizes. - Memory model:
 generalize the opaque encoding of pointers to   apply to any value, not just
 pointers, if chunks Many32/Many64   are selected. - More properties of FP
 arithmetic proved.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2537 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 backend/Allocation.v      | 116 ++++---
 backend/Allocproof.v      | 227 +++++++------
 backend/CMtypecheck.ml    |  27 +-
 backend/CSE.v             |   3 +-
 backend/CSEproof.v        |   1 -
 backend/Cminor.v          |  62 +++-
 backend/Constprop.v       |   1 +
 backend/Constpropproof.v  |   2 +
 backend/IRC.ml            |  53 +++-
 backend/Inliningproof.v   |   1 +
 backend/Lineartyping.v    | 786 +++++++++-------------------------------------
 backend/Locations.v       |  61 ++--
 backend/NeedDomain.v      | 101 +-----
 backend/PrintCminor.ml    |  24 +-
 backend/PrintXTL.ml       |   2 +
 backend/RTLtyping.v       |  88 ++----
 backend/Regalloc.ml       |  60 +++-
 backend/SelectDiv.vp      |  12 +
 backend/SelectDivproof.v  |  16 +
 backend/SelectLong.vp     |   4 +
 backend/SelectLongproof.v |  28 ++
 backend/Selection.v       |  15 +
 backend/Selectionproof.v  |  15 +
 backend/Stacking.v        |   8 +-
 backend/Stackingproof.v   | 111 ++++---
 backend/ValueAnalysis.v   |   4 +-
 backend/ValueDomain.v     | 317 ++++++++++++++-----
 27 files changed, 1011 insertions(+), 1134 deletions(-)

(limited to 'backend')

diff --git a/backend/Allocation.v b/backend/Allocation.v
index f4fcd6e..919843b 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -609,6 +609,20 @@ Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
      (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
      (Some e).
 
+(** [loc_type_compat env l e] checks that for all equations [r = l] in [e],
+  the type [env r] of [r] is compatible with the type of [l]. *)
+
+Definition sel_type (k: equation_kind) (ty: typ) : typ :=
+  match k with
+  | Full => ty
+  | Low | High => Tint
+  end.
+
+Definition loc_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
+  EqSet2.for_all_between
+    (fun q => subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l))
+    (select_loc_l l) (select_loc_h l) (eqs2 e).
+
 (** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
     [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
 *)
@@ -759,18 +773,25 @@ Definition destroyed_by_move (src dst: loc) :=
   | _, _ => destroyed_by_op Omove
   end.
 
+Definition well_typed_move (env: regenv) (dst: loc) (e: eqs) : bool :=
+  match dst with
+  | R r => true
+  | S sl ofs ty => loc_type_compat env dst e
+  end.
+
 (** Simulate the effect of a sequence of moves [mv] on a set of
   equations [e].  The set [e] is the equations that must hold
   after the sequence of moves.  Return the set of equations that
   must hold before the sequence of moves.  Return [None] if the
   set of equations [e] cannot hold after the sequence of moves. *)
 
-Fixpoint track_moves (mv: moves) (e: eqs) : option eqs :=
+Fixpoint track_moves (env: regenv) (mv: moves) (e: eqs) : option eqs :=
   match mv with
   | nil => Some e
   | (src, dst) :: mv =>
-      do e1 <- track_moves mv e;
+      do e1 <- track_moves env mv e;
       assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
+      assertion (well_typed_move env dst e1);
       subst_loc dst src e1
   end.
 
@@ -803,89 +824,90 @@ Definition kind_second_word := if Archi.big_endian then Low else High.
   equations that must hold "before" these instructions, or [None] if
   impossible. *)
 
-Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs :=
+Definition transfer_aux (f: RTL.function) (env: regenv)
+                        (shape: block_shape) (e: eqs) : option eqs :=
   match shape with
   | BSnop mv s =>
-      track_moves mv e
+      track_moves env mv e
   | BSmove src dst mv s =>
-      track_moves mv (subst_reg dst src e)
+      track_moves env mv (subst_reg dst src e)
   | BSmakelong src1 src2 dst mv s =>
       let e1 := subst_reg_kind dst High src1 Full e in
       let e2 := subst_reg_kind dst Low src2 Full e1 in
       assertion (reg_unconstrained dst e2);
-      track_moves mv e2
+      track_moves env mv e2
   | BSlowlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src Low e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BShighlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src High e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BSop op args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
-      track_moves mv e
+      track_moves env mv e
   | BSload chunk addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s =>
-      do e1 <- track_moves mv3 e;
+      do e1 <- track_moves env mv3 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in
       assertion (loc_unconstrained (R dst2') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args2' e2;
-      do e4 <- track_moves mv2 e3;
+      do e4 <- track_moves env mv2 e3;
       let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in
       assertion (loc_unconstrained (R dst1') e5);
       assertion (can_undef (destroyed_by_load Mint32 addr) e5);
       assertion (reg_unconstrained dst e5);
       do e6 <- add_equations args args1' e5;
-      track_moves mv1 e6
+      track_moves env mv1 e6
   | BSload2_1 addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr) e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSloaddead chunk addr args dst mv s =>
       assertion (reg_unconstrained dst e);
-      track_moves mv e
+      track_moves env mv e
   | BSstore chunk addr args src mv args' src' s =>
       assertion (can_undef (destroyed_by_store chunk addr) e);
       do e1 <- add_equations (src :: args) (src' :: args') e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s =>
       assertion (can_undef (destroyed_by_store Mint32 addr') e);
       do e1 <- add_equations args args2' 
                   (add_equation (Eq kind_second_word src (R src2')) e);
-      do e2 <- track_moves mv2 e1;
+      do e2 <- track_moves env mv2 e1;
       assertion (can_undef (destroyed_by_store Mint32 addr) e2);
       do e3 <- add_equations args args1' 
                   (add_equation (Eq kind_first_word src (R src1')) e2);
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BScall sg ros args res mv1 ros' mv2 s =>
       let args' := loc_arguments sg in
       let res' := map R (loc_result sg) in
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- remove_equations_res res (sig_res sg) res' e1;
       assertion (forallb (fun l => reg_loc_unconstrained res l e2) res');
       assertion (no_caller_saves e2);
       do e3 <- add_equation_ros ros ros' e2;
       do e4 <- add_equations_args args (sig_args sg) args' e3;
-      track_moves mv1 e4
+      track_moves env mv1 e4
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
@@ -893,9 +915,9 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSbuiltin ef args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let args' := map R args' in
       let res' := map R res' in
       do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1;
@@ -903,23 +925,23 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (forallb (fun l => loc_unconstrained l e2) res');
       assertion (can_undef (destroyed_by_builtin ef) e2);
       do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSannot txt typ args res mv1 args' s =>
       do e1 <- add_equations_args args (annot_args_typ typ) args' e;
-      track_moves mv1 e1
+      track_moves env mv1 e1
   | BScond cond args mv args' s1 s2 =>
       assertion (can_undef (destroyed_by_cond cond) e);
       do e1 <- add_equations args args' e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSjumptable arg mv arg' tbl =>
       assertion (can_undef destroyed_by_jumptable e);
-      track_moves mv (add_equation (Eq Full arg (R arg')) e)
+      track_moves env mv (add_equation (Eq Full arg (R arg')) e)
   | BSreturn None mv =>
-      track_moves mv empty_eqs
+      track_moves env mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := map R (loc_result (RTL.fn_sig f)) in
       do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
-      track_moves mv e1
+      track_moves env mv e1
   end.
 
 (** The main transfer function for the dataflow analysis.  Like [transfer_aux],
@@ -927,7 +949,7 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
   equations that must hold "after".  It also handles error propagation
   and reporting. *)
 
-Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
+Definition transfer (f: RTL.function) (env: regenv) (shapes: PTree.t block_shape)
                     (pc: node) (after: res eqs) : res eqs :=
   match after with
   | Error _ => after
@@ -935,7 +957,7 @@ Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
       match shapes!pc with
       | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil)
       | Some shape =>
-          match transfer_aux f shape e with
+          match transfer_aux f env shape e with
           | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil)
           | Some e' => OK e'
           end
@@ -1083,8 +1105,8 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BSreturn optarg mv => nil
   end.
 
-Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) :=
-  DS.fixpoint_allnodes bsh successors_block_shape (transfer f bsh).
+Definition analyze (f: RTL.function) (env: regenv) (bsh: PTree.t block_shape) :=
+  DS.fixpoint_allnodes bsh successors_block_shape (transfer f env bsh).
 
 (** * Validating and translating functions and programs *)
 
@@ -1118,9 +1140,9 @@ Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e
   and stack size. *)
 
 Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function)
-                                 (e1: eqs) : option unit :=
+                                 (env: regenv) (e1: eqs) : option unit :=
   do mv <- pair_entrypoints rtl ltl;
-  do e2 <- track_moves mv e1;
+  do e2 <- track_moves env mv e1;
   assertion (compat_entry (RTL.fn_params rtl)
                           (sig_args (RTL.fn_sig rtl))
                           (loc_parameters (RTL.fn_sig rtl)) e2);
@@ -1133,10 +1155,10 @@ Local Close Scope option_monad_scope.
 Local Open Scope error_monad_scope.
 
 Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
-                             (bsh: PTree.t block_shape)
+                             (env: regenv) (bsh: PTree.t block_shape)
                              (a: PMap.t LEq.t): res unit :=
-  do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
-  match check_entrypoints_aux rtl ltl e1 with
+  do e1 <- transfer rtl env bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
+  match check_entrypoints_aux rtl ltl env e1 with
   | None => Error (msg "invalid register allocation at entry point")
   | Some _ => OK tt
   end.
@@ -1145,11 +1167,11 @@ Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
   a source RTL function and an LTL function generated by the external
   register allocator. *)
 
-Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit :=
+Definition check_function (rtl: RTL.function) (ltl: LTL.function) (env: regenv): res unit :=
   let bsh := pair_codes rtl ltl in
-  match analyze rtl bsh with
+  match analyze rtl env bsh with
   | None => Error (msg "allocation analysis diverges")
-  | Some a => check_entrypoints rtl ltl bsh a
+  | Some a => check_entrypoints rtl ltl env bsh a
   end.
 
 (** [regalloc] is the external register allocator.  It is written in OCaml
@@ -1165,7 +1187,7 @@ Definition transf_function (f: RTL.function) : res LTL.function :=
   | OK env =>
       match regalloc f with
       | Error m => Error m
-      | OK tf => do x <- check_function f tf; OK tf
+      | OK tf => do x <- check_function f tf env; OK tf
       end
   end.
 
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index 9303878..588a674 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -451,7 +451,7 @@ Lemma add_equations_args_lessdef:
   forall rs ls rl tyl ll e e',
   add_equations_args rl tyl ll e = Some e' ->
   satisf rs ls e' ->
-  Val.has_type_list (rs##rl) (normalize_list tyl) ->
+  Val.has_type_list (rs##rl) tyl ->
   Val.lessdef_list (rs##rl) (decode_longs tyl (map ls ll)).
 Proof.
   intros until e'. functional induction (add_equations_args rl tyl ll e); simpl; intros.
@@ -969,7 +969,6 @@ Proof.
   split. apply eqs_same; auto. auto. 
 Qed.
 
-(*
 Lemma loc_type_compat_charact:
   forall env l e q,
   loc_type_compat env l e = true ->
@@ -1009,8 +1008,7 @@ Proof.
   destruct (rs#r); exact I.
   eelim Loc.diff_not_eq. eexact A. auto.
 Qed.
-*)
-(*
+
 Remark val_lessdef_normalize:
   forall v v' ty,
   Val.has_type v ty -> Val.lessdef v v' ->
@@ -1018,19 +1016,23 @@ Remark val_lessdef_normalize:
 Proof.
   intros. inv H0. rewrite Val.load_result_same; auto. auto. 
 Qed.
-*)
 
 Lemma subst_loc_satisf:
   forall env src dst rs ls e e',
   subst_loc dst src e = Some e' ->
-  (*well_typed_move env dst e = true ->*)
+  well_typed_move env dst e = true ->
   wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) ls) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
-  subst dst. rewrite Locmap.gss. apply (H1 _ B).
+  subst dst. rewrite Locmap.gss.
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H2 _ B). 
+  apply val_lessdef_normalize; auto. apply (H2 _ B). 
   rewrite Locmap.gso; auto.
 Qed.
 
@@ -1077,18 +1079,22 @@ Proof.
 Qed.
 
 Lemma subst_loc_undef_satisf:
-  forall src dst rs ls ml e e',
+  forall env src dst rs ls ml e e',
   subst_loc dst src e = Some e' ->
-  (*well_typed_move env dst e = true ->*)
+  well_typed_move env dst e = true ->
   can_undef_except dst ml e = true ->
-  (*wt_regset env rs ->*)
+  wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
   subst dst. rewrite Locmap.gss.
-  destruct q as [k r l]; simpl in *. apply (H1 _ B). 
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H3 _ B). 
+  apply val_lessdef_normalize; auto. apply (H3 _ B). 
   rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. 
   eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto.
 Qed.
@@ -1327,30 +1333,45 @@ Proof.
   auto. congruence.
 Qed.
 
+Lemma loadv_int64_split:
+  forall m a v,
+  Mem.loadv Mint64 m a = Some v ->
+  exists v1 v2,
+     Mem.loadv Mint32 m a = Some (if Archi.big_endian then v1 else v2)
+  /\ Mem.loadv  Mint32 m (Val.add a (Vint (Int.repr 4))) = Some (if Archi.big_endian then v2 else v1)
+  /\ Val.lessdef (Val.hiword v) v1
+  /\ Val.lessdef (Val.loword v) v2.
+Proof.
+  intros. exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & A & B & C).
+  exists v1, v2. split; auto. split; auto. 
+  inv C; auto. destruct v1, v2; simpl; auto. 
+  rewrite Int64.hi_ofwords, Int64.lo_ofwords; auto. 
+Qed.
+
 (** * Properties of the dataflow analysis *)
 
 Lemma analyze_successors:
-  forall f bsh an pc bs s e,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e.
+  exists e', transfer f env bsh s an!!s = OK e' /\ EqSet.Subset e' e.
 Proof.
   unfold analyze; intros. exploit DS.fixpoint_allnodes_solution; eauto.
-  rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros.
+  rewrite H2. unfold DS.L.ge. destruct (transfer f env bsh s an#s); intros.
   exists e0; auto. 
   contradiction.
 Qed.
 
 Lemma satisf_successors:
-  forall f bsh an pc bs s e rs ls,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e rs ls,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
   satisf rs ls e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'.
+  exists e', transfer f env bsh s an!!s = OK e' /\ satisf rs ls e'.
 Proof.
   intros. exploit analyze_successors; eauto. intros [e' [A B]]. 
   exists e'; split; auto. eapply satisf_incr; eauto.
@@ -1360,12 +1381,13 @@ Qed.
 
 Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop :=
   | transf_function_spec_intro:  
-      forall an mv k e1 e2,
-      analyze f (pair_codes f tf) = Some an ->
+      forall env an mv k e1 e2,
+      wt_function f env ->
+      analyze f env (pair_codes f tf) = Some an ->
       (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
       wf_moves mv ->
-      transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
-      track_moves mv e1 = Some e2 ->
+      transfer f env (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
+      track_moves env mv e1 = Some e2 ->
       compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
       can_undef destroyed_at_function_entry e2 = true ->
       RTL.fn_stacksize f = LTL.fn_stacksize tf ->
@@ -1380,33 +1402,36 @@ Proof.
   unfold transf_function; intros.
   destruct (type_function f) as [env|] eqn:TY; try discriminate.
   destruct (regalloc f); try discriminate.
-  destruct (check_function f f0) as [] eqn:?; inv H.
+  destruct (check_function f f0 env) as [] eqn:?; inv H.
   unfold check_function in Heqr. 
-  destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate.
+  destruct (analyze f env (pair_codes f tf)) as [an|] eqn:?; try discriminate.
   monadInv Heqr. 
-  destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate.
+  destruct (check_entrypoints_aux f tf env x) as [y|] eqn:?; try discriminate.
   unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
   exploit extract_moves_sound; eauto. intros [A B]. subst b.
   exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
-  econstructor; eauto. congruence. 
+  econstructor; eauto. eapply type_function_correct; eauto. congruence. 
 Qed.
 
 Lemma invert_code:
-  forall f tf pc i opte e,
+  forall f env tf pc i opte e,
+  wt_function f env ->
   (RTL.fn_code f)!pc = Some i ->
-  transfer f (pair_codes f tf) pc opte = OK e ->
+  transfer f env (pair_codes f tf) pc opte = OK e ->
   exists eafter, exists bsh, exists bb,
   opte = OK eafter /\ 
   (pair_codes f tf)!pc = Some bsh /\
   (LTL.fn_code tf)!pc = Some bb /\
   expand_block_shape bsh i bb /\
-  transfer_aux f bsh eafter = Some e.
+  transfer_aux f env bsh eafter = Some e /\
+  wt_instr f env i.
 Proof.
-  intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter.
+  intros. destruct opte as [eafter|]; simpl in H1; try discriminate. exists eafter.
   destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. 
   exploit matching_instr_block; eauto. intros [bb [A B]].
-  destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. 
-  exists bb. tauto. 
+  destruct (transfer_aux f env bsh eafter) as [e1|] eqn:?; inv H1. 
+  exists bb. exploit wt_instr_at; eauto.
+  tauto. 
 Qed.
 
 (** * Semantic preservation *)
@@ -1480,10 +1505,11 @@ Proof.
 Qed.
 
 Lemma exec_moves:
-  forall mv rs s f sp bb m e e' ls,
-  track_moves mv e = Some e' ->
+  forall mv env rs s f sp bb m e e' ls,
+  track_moves env mv e = Some e' ->
   wf_moves mv ->
   satisf rs ls e' ->
+  wt_regset env rs ->
   exists ls',
     star step tge (Block s f sp (expand_moves mv bb) ls m)
                E0 (Block s f sp bb ls' m)
@@ -1495,7 +1521,7 @@ Opaque destroyed_by_op.
 - unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
   (* step *)
 - destruct a as [src dst]. unfold expand_moves. simpl. 
-  destruct (track_moves mv e) as [e1|] eqn:?; MonadInv.
+  destruct (track_moves env mv e) as [e1|] eqn:?; MonadInv.
   assert (wf_moves mv). red; intros. apply H0; auto with coqlib. 
   destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
   (* reg-reg *)
@@ -1521,13 +1547,17 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
       sg.(sig_res) = Some Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
-      forall res f sp pc rs s tf bb ls ts sg an e
+      forall res f sp pc rs s tf bb ls ts sg an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs)
+        (WTRES: env res = proj_sig_res sg)
         (STEPS: forall v ls1 m,
            Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) ->
+           Val.has_type v (env res) ->
            agree_callee_save ls ls1 ->
            exists ls2,
            star LTL.step tge (Block ts tf sp bb ls1 m)
@@ -1540,13 +1570,15 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
 
 Inductive match_states: RTL.state -> LTL.state -> Prop :=
   | match_states_intro:
-      forall s f sp pc rs m ts tf ls m' an e
+      forall s f sp pc rs m ts tf ls m' an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
         (SAT: satisf rs ls e)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs),
       match_states (RTL.State s f sp pc rs m)
                    (LTL.State ts tf sp pc ls m')
   | match_states_call:
@@ -1555,7 +1587,8 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (FUN: transf_fundef f = OK tf)
         (ARGS: Val.lessdef_list args (decode_longs (sig_args (funsig tf)) (map ls (loc_arguments (funsig tf)))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTARGS: Val.has_type_list args (sig_args (funsig tf))),
       match_states (RTL.Callstate s f args m)
                    (LTL.Callstate ts tf ls m')
   | match_states_return:
@@ -1563,7 +1596,8 @@ Inductive match_states: RTL.state -> LTL.state -> Prop :=
         (STACKS: match_stackframes s ts sg)
         (RES: Val.lessdef_list (encode_long (sig_res sg) res) (map ls (map R (loc_result sg))))
         (AG: agree_callee_save (parent_locset ts) ls)
-        (MEM: Mem.extends m m'),
+        (MEM: Mem.extends m m')
+        (WTRES: Val.has_type res (proj_sig_res sg)),
       match_states (RTL.Returnstate s res m)
                    (LTL.Returnstate ts ls m').
 
@@ -1576,13 +1610,14 @@ Proof.
   intros. inv H. 
   constructor. congruence.
   econstructor; eauto.
+  unfold proj_sig_res in *. rewrite H0; auto. 
   intros. unfold loc_result in H; rewrite H0 in H; eauto. 
 Qed.
 
 Ltac UseShape :=
   match goal with
-  | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] =>
-      destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); 
+  | [ WT: wt_function _ _, CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ _ = OK _ |- _ ] =>
+      destruct (invert_code _ _ _ _ _ _ _ WT CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR & WTI); 
       inv EBS; unfold transfer_aux in TR; MonadInv
   end.
 
@@ -1626,7 +1661,8 @@ Proof.
   econstructor; eauto. 
 
 (* op move *)
-- simpl in H0. inv H0.  
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0.  
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1637,7 +1673,8 @@ Proof.
   econstructor; eauto.
 
 (* op makelong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1649,7 +1686,8 @@ Proof.
   econstructor; eauto.
 
 (* op lowlong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1661,7 +1699,8 @@ Proof.
   econstructor; eauto.
 
 (* op highlong *)
-- simpl in H0. inv H0. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0. 
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1673,7 +1712,8 @@ Proof.
   econstructor; eauto.
 
 (* op regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_operation_lessdef; eauto. intros [v' [F G]]. 
   exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
@@ -1697,9 +1737,11 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
+  eapply wt_exec_Iop; eauto. 
 
 (* load regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
   exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. 
@@ -1715,7 +1757,8 @@ Proof.
   econstructor; eauto.
 
 (* load pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1729,15 +1772,12 @@ Proof.
     eapply reg_unconstrained_satisf. eauto. 
     eapply add_equations_satisf; eauto. assumption.
     rewrite Regmap.gss. apply Val.lessdef_trans with v1'; auto. 
-    subst v. unfold v1', kind_first_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
   assert (LD3: Val.lessdef_list rs##args (reglist ls3 args2')).
   { replace (rs##args) with ((rs#dst<-v)##args). 
     eapply add_equations_lessdef; eauto. 
     apply list_map_exten; intros. rewrite Regmap.gso; auto.
-    exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
     eapply addressing_not_long; eauto. 
   }
   exploit eval_addressing_lessdef. eexact LD3.
@@ -1747,9 +1787,7 @@ Proof.
   assert (SAT4: satisf (rs#dst <- v) ls4 e0).
   { eapply loc_unconstrained_satisf. eapply can_undef_satisf; eauto. 
     eapply add_equations_satisf; eauto. assumption.
-    apply Val.lessdef_trans with v2'; auto. 
-    rewrite Regmap.gss. subst v. unfold v2', kind_second_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
+    rewrite Regmap.gss. apply Val.lessdef_trans with v2'; auto.
   }
   exploit (exec_moves mv3); eauto. intros [ls5 [A5 B5]].
   econstructor; split.
@@ -1769,7 +1807,8 @@ Proof.
   econstructor; eauto.
 
 (* load first word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1781,8 +1820,6 @@ Proof.
   assert (SAT2: satisf (rs#dst <- v) ls2 e0).
   { eapply parallel_assignment_satisf; eauto.
     apply Val.lessdef_trans with v1'; auto. 
-    subst v. unfold v1', kind_first_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
     eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
@@ -1799,7 +1836,8 @@ Proof.
   econstructor; eauto.
 
 (* load second word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V1 & V2).
   set (v2' := if Archi.big_endian then v2 else v1) in *.
   set (v1' := if Archi.big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1812,8 +1850,6 @@ Proof.
   assert (SAT2: satisf (rs#dst <- v) ls2 e0).
   { eapply parallel_assignment_satisf; eauto.
     apply Val.lessdef_trans with v2'; auto. 
-    subst v. unfold v2', kind_second_word.
-    destruct Archi.big_endian; apply val_hiword_longofwords || apply val_loword_longofwords.
     eapply can_undef_satisf. eauto. eapply add_equations_satisf; eauto.
   }
   exploit (exec_moves mv2); eauto. intros [ls3 [A3 B3]]. 
@@ -1839,6 +1875,7 @@ Proof.
   eapply reg_unconstrained_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
+  eapply wt_exec_Iload; eauto.
 
 (* store *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]].
@@ -1918,19 +1955,21 @@ Proof.
   exploit analyze_successors; eauto. simpl. left; eauto. intros [enext [U V]].
   econstructor; eauto.
   econstructor; eauto.
+  inv WTI. congruence.
   intros. exploit (exec_moves mv2). eauto. eauto. 
   eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto.
   eapply add_equation_ros_satisf; eauto. 
   eapply add_equations_args_satisf; eauto. 
   congruence.
+  apply wt_regset_assign; auto. 
   intros [ls2 [A2 B2]].
   exists ls2; split. 
   eapply star_right. eexact A2. constructor. traceEq.
   apply satisf_incr with eafter; auto. 
   rewrite SIG. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H7. apply wt_regset_list; auto. 
-  simpl. red; auto.
+  simpl. red; auto. 
+  inv WTI. rewrite SIG. rewrite <- H7. apply wt_regset_list; auto. 
 
 (* tailcall *)
 - set (sg := RTL.funsig fd) in *.
@@ -1951,15 +1990,15 @@ Proof.
   eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq.
   destruct (transf_function_inv _ _ FUN); auto.
   rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H6. apply wt_regset_list; auto. 
   apply return_regs_agree_callee_save.
+  rewrite SIG. inv WTI. rewrite <- H6. apply wt_regset_list; auto. 
 
 (* builtin *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- assert (WTRS': wt_regset env (rs#res <- v)) by (eapply wt_exec_Ibuiltin; eauto).
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto.
   eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (E: map ls1 (map R args') = reglist ls1 args').
@@ -1990,7 +2029,6 @@ Proof.
 (* annot *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto.
-  exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
   inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (v = Vundef). red in H0; inv H0. auto.
@@ -2009,6 +2047,7 @@ Proof.
   econstructor; eauto.
   change (destroyed_by_builtin (EF_annot txt typ)) with (@nil mreg). 
   simpl. subst v. assumption.
+  apply wt_regset_assign; auto. subst v. constructor. 
 
 (* cond *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
@@ -2040,43 +2079,44 @@ Proof.
 
 (* return *)
 - destruct (transf_function_inv _ _ FUN). 
-  exploit Mem.free_parallel_extends; eauto. rewrite H9. intros [m'' [P Q]].
-  destruct or as [r|]; MonadInv.
-  (* with an argument *)
-+ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]].
+  inv WTI; MonadInv.
++ (* without an argument *)
+  exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
   eapply star_right. eexact A1.
   econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto. rewrite <- H10.
-  replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
-     with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
-  eapply add_equations_res_lessdef; eauto.
-  rewrite !list_map_compose. apply list_map_exten; intros.
-  unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
+  simpl. econstructor; eauto.
+  unfold encode_long, loc_result. rewrite <- H11; rewrite H13. simpl; auto.
   apply return_regs_agree_callee_save.
-
-  (* without an argument *)
-+ assert (SG: f.(RTL.fn_sig).(sig_res) = None).
-  { exploit wt_instr_inv; eauto. intros (env & WTI & WTRS). 
-    inv WTI; auto. }
+  constructor.
++ (* with an argument *)
   exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
   eapply star_right. eexact A1.
   econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto.
-  unfold encode_long, loc_result. rewrite <- H10. rewrite SG. simpl; auto.
+  simpl. econstructor; eauto. rewrite <- H11.
+  replace (map (return_regs (parent_locset ts) ls1) (map R (loc_result (RTL.fn_sig f))))
+     with (map ls1 (map R (loc_result (RTL.fn_sig f)))).
+  eapply add_equations_res_lessdef; eauto.
+  rewrite !list_map_compose. apply list_map_exten; intros.
+  unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
   apply return_regs_agree_callee_save.
+  unfold proj_sig_res. rewrite <- H11; rewrite H13. apply WTRS. 
 
 (* internal function *)
 - monadInv FUN. simpl in *.
   destruct (transf_function_inv _ _ EQ). 
-  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H7; apply Zle_refl. 
+  exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. 
   intros [m'' [U V]].
+  assert (WTRS: wt_regset env (init_regs args (fn_params f))).
+  { apply wt_init_regs. inv H0. rewrite wt_params. rewrite H9. auto. }
   exploit (exec_moves mv). eauto. eauto.
     eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. 
-    rewrite call_regs_param_values. rewrite H8. eexact ARGS.
+    rewrite call_regs_param_values. rewrite H9. eexact ARGS.
+    exact WTRS.
   intros [ls1 [A B]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -2108,13 +2148,15 @@ Proof.
   exploit list_in_map_inv; eauto. intros [r [A B]]; subst l'. 
   destruct l; simpl; auto. red; intros; subst r0; elim H0.
   eapply loc_result_caller_save; eauto.
- 
+  simpl. eapply external_call_well_typed; eauto. 
+
 (* return *)
 - inv STACKS.
-  exploit STEPS; eauto. intros [ls2 [A B]].
+  exploit STEPS; eauto. rewrite WTRES0; auto. intros [ls2 [A B]].
   econstructor; split.
   eapply plus_left. constructor. eexact A. traceEq.
   econstructor; eauto.
+  apply wt_regset_assign; auto. rewrite WTRES0; auto.
 Qed.
 
 Lemma initial_states_simulation:
@@ -2135,6 +2177,7 @@ Proof.
   rewrite SIG; rewrite H3; simpl; auto.
   red; auto.
   apply Mem.extends_refl.
+  rewrite SIG, H3. constructor.
 Qed.
 
 Lemma final_states_simulation:
diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml
index 5e46d76..02c3f21 100644
--- a/backend/CMtypecheck.ml
+++ b/backend/CMtypecheck.ml
@@ -34,12 +34,16 @@ let tint = Base Tint
 let tfloat = Base Tfloat
 let tlong = Base Tlong
 let tsingle = Base Tsingle
+let tany32 = Base Tany32
+let tany64 = Base Tany64
 
 let ty_of_typ = function
   | Tint -> tint
   | Tfloat -> tfloat
   | Tlong -> tlong
-  | Tsingle -> tfloat (* should be tsingle when supported *)
+  | Tsingle -> tsingle
+  | Tany32 -> tany32
+  | Tany64 -> tany64
 
 let ty_of_sig_args tyl = List.map ty_of_typ tyl
 
@@ -88,6 +92,7 @@ let name_of_comparison = function
 let type_constant = function
   | Ointconst _ -> tint
   | Ofloatconst _ -> tfloat
+  | Osingleconst _ -> tsingle
   | Olongconst _ -> tlong
   | Oaddrsymbol _ -> tint
   | Oaddrstack _ -> tint
@@ -101,11 +106,18 @@ let type_unary_operation = function
   | Onotint -> tint, tint
   | Onegf -> tfloat, tfloat
   | Oabsf -> tfloat, tfloat
-  | Osingleoffloat -> tfloat, tfloat
+  | Onegfs -> tsingle, tsingle
+  | Oabsfs -> tsingle, tsingle
+  | Osingleoffloat -> tfloat, tsingle
+  | Ofloatofsingle -> tsingle, tfloat
   | Ointoffloat -> tfloat, tint
   | Ointuoffloat -> tfloat, tint
   | Ofloatofint -> tint, tfloat
   | Ofloatofintu -> tint, tfloat
+  | Ointofsingle -> tsingle, tint
+  | Ointuofsingle -> tsingle, tint
+  | Osingleofint -> tint, tsingle
+  | Osingleofintu -> tint, tsingle
   | Onegl -> tlong, tlong
   | Onotl -> tlong, tlong
   | Ointoflong -> tlong, tint
@@ -115,6 +127,8 @@ let type_unary_operation = function
   | Olonguoffloat -> tfloat, tlong
   | Ofloatoflong -> tlong, tfloat
   | Ofloatoflongu -> tlong, tfloat
+  | Olongofsingle -> tsingle, tlong
+  | Olonguofsingle -> tsingle, tlong
   | Osingleoflong -> tlong, tfloat
   | Osingleoflongu -> tlong, tfloat
 
@@ -136,6 +150,10 @@ let type_binary_operation = function
   | Osubf -> tfloat, tfloat, tfloat
   | Omulf -> tfloat, tfloat, tfloat
   | Odivf -> tfloat, tfloat, tfloat
+  | Oaddfs -> tsingle, tsingle, tsingle
+  | Osubfs -> tsingle, tsingle, tsingle
+  | Omulfs -> tsingle, tsingle, tsingle
+  | Odivfs -> tsingle, tsingle, tsingle
   | Oaddl -> tlong, tlong, tlong
   | Osubl -> tlong, tlong, tlong
   | Omull -> tlong, tlong, tlong
@@ -152,6 +170,7 @@ let type_binary_operation = function
   | Ocmp _ -> tint, tint, tint
   | Ocmpu _ -> tint, tint, tint
   | Ocmpf _ -> tfloat, tfloat, tint
+  | Ocmpfs _ -> tsingle, tsingle, tint
   | Ocmpl _ -> tlong, tlong, tint
   | Ocmplu _ -> tlong, tlong, tint
 
@@ -166,8 +185,10 @@ let type_chunk = function
   | Mint16unsigned -> tint
   | Mint32 -> tint
   | Mint64 -> tlong
-  | Mfloat32 -> tfloat
+  | Mfloat32 -> tsingle
   | Mfloat64 -> tfloat
+  | Many32 -> tany32
+  | Many64 -> tany64
 
 let name_of_chunk = PrintAST.name_of_chunk
 
diff --git a/backend/CSE.v b/backend/CSE.v
index 373acce..fa22987 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -293,7 +293,6 @@ Definition store_normalized_range (chunk: memory_chunk) : aval :=
   | Mint8unsigned => Uns 8
   | Mint16signed => Sgn 16
   | Mint16unsigned => Uns 16
-  | Mfloat32 => Fsingle
   | _ => Vtop
   end.
 
@@ -380,7 +379,7 @@ Variable n: numbering.
 Fixpoint reduce_rec (niter: nat) (op: A) (args: list valnum) : option(A * list reg) :=
   match niter with
   | O => None
-  | S niter' =>
+  | Datatypes.S niter' =>
       match f (fun v => find_valnum_num v n.(num_eqs)) op args with
       | None => None
       | Some(op', args') =>
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 317fb82..af138f8 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -497,7 +497,6 @@ Proof.
 - inv H. rewrite is_uns_zero_ext in H3 by omega. rewrite H3; auto.
 - inv H. rewrite is_sgn_sign_ext in H3 by omega. rewrite H3; auto.
 - inv H. rewrite is_uns_zero_ext in H3 by omega. rewrite H3; auto.
-- inv H. rewrite Float.singleoffloat_of_single by auto. auto.
 Qed.
 
 Lemma add_store_result_hold:
diff --git a/backend/Cminor.v b/backend/Cminor.v
index aaf7510..bf20de2 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -35,7 +35,8 @@ Require Import Switch.
 
 Inductive constant : Type :=
   | Ointconst: int -> constant     (**r integer constant *)
-  | Ofloatconst: float -> constant (**r floating-point constant *)
+  | Ofloatconst: float -> constant (**r double-precision floating-point constant *)
+  | Osingleconst: float32 -> constant (**r single-precision floating-point constant *)
   | Olongconst: int64 -> constant  (**r long integer constant *)
   | Oaddrsymbol: ident -> int -> constant (**r address of the symbol plus the offset *)
   | Oaddrstack: int -> constant.   (**r stack pointer plus the given offset *)
@@ -47,22 +48,31 @@ Inductive unary_operation : Type :=
   | Ocast16signed: unary_operation         (**r 16-bit sign extension *)
   | Onegint: unary_operation               (**r integer opposite *)
   | Onotint: unary_operation               (**r bitwise complement  *)
-  | Onegf: unary_operation                 (**r float opposite *)
-  | Oabsf: unary_operation                 (**r float absolute value *)
+  | Onegf: unary_operation                 (**r float64 opposite *)
+  | Oabsf: unary_operation                 (**r float64 absolute value *)
+  | Onegfs: unary_operation                (**r float32 opposite *)
+  | Oabsfs: unary_operation                (**r float32 absolute value *)
   | Osingleoffloat: unary_operation        (**r float truncation to float32 *)
-  | Ointoffloat: unary_operation           (**r signed integer to float *)
-  | Ointuoffloat: unary_operation          (**r unsigned integer to float *)
-  | Ofloatofint: unary_operation           (**r float to signed integer *)
-  | Ofloatofintu: unary_operation          (**r float to unsigned integer *)
+  | Ofloatofsingle: unary_operation        (**r float extension to float64 *)
+  | Ointoffloat: unary_operation           (**r signed integer to float64 *)
+  | Ointuoffloat: unary_operation          (**r unsigned integer to float64 *)
+  | Ofloatofint: unary_operation           (**r float64 to signed integer *)
+  | Ofloatofintu: unary_operation          (**r float64 to unsigned integer *)
+  | Ointofsingle: unary_operation          (**r signed integer to float32 *)
+  | Ointuofsingle: unary_operation         (**r unsigned integer to float32 *)
+  | Osingleofint: unary_operation          (**r float32 to signed integer *)
+  | Osingleofintu: unary_operation        (**r float32 to unsigned integer *)
   | Onegl: unary_operation                 (**r long integer opposite *)
   | Onotl: unary_operation                 (**r long bitwise complement *)
   | Ointoflong: unary_operation            (**r long to int *)
   | Olongofint: unary_operation            (**r signed int to long *)
   | Olongofintu: unary_operation           (**r unsigned int to long *)
-  | Olongoffloat: unary_operation          (**r float to signed long *)
-  | Olonguoffloat: unary_operation         (**r float to unsigned long *)
-  | Ofloatoflong: unary_operation          (**r signed long to float *)
-  | Ofloatoflongu: unary_operation         (**r unsigned long to float *)
+  | Olongoffloat: unary_operation          (**r float64 to signed long *)
+  | Olonguoffloat: unary_operation         (**r float64 to unsigned long *)
+  | Ofloatoflong: unary_operation          (**r signed long to float64 *)
+  | Ofloatoflongu: unary_operation         (**r unsigned long to float64 *)
+  | Olongofsingle: unary_operation         (**r float32 to signed long *)
+  | Olonguofsingle: unary_operation        (**r float32 to unsigned long *)
   | Osingleoflong: unary_operation         (**r signed long to float32 *)
   | Osingleoflongu: unary_operation.       (**r unsigned long to float32 *)
 
@@ -80,10 +90,14 @@ Inductive binary_operation : Type :=
   | Oshl: binary_operation                 (**r integer left shift *)
   | Oshr: binary_operation                 (**r integer right signed shift *)
   | Oshru: binary_operation                (**r integer right unsigned shift *)
-  | Oaddf: binary_operation                (**r float addition *)
-  | Osubf: binary_operation                (**r float subtraction *)
-  | Omulf: binary_operation                (**r float multiplication *)
-  | Odivf: binary_operation                (**r float division *)
+  | Oaddf: binary_operation                (**r float64 addition *)
+  | Osubf: binary_operation                (**r float64 subtraction *)
+  | Omulf: binary_operation                (**r float64 multiplication *)
+  | Odivf: binary_operation                (**r float64 division *)
+  | Oaddfs: binary_operation               (**r float32 addition *)
+  | Osubfs: binary_operation               (**r float32 subtraction *)
+  | Omulfs: binary_operation               (**r float32 multiplication *)
+  | Odivfs: binary_operation               (**r float32 division *)
   | Oaddl: binary_operation                (**r long addition *)
   | Osubl: binary_operation                (**r long subtraction *)
   | Omull: binary_operation                (**r long multiplication *)
@@ -99,7 +113,8 @@ Inductive binary_operation : Type :=
   | Oshrlu: binary_operation               (**r long right unsigned shift *)
   | Ocmp: comparison -> binary_operation   (**r integer signed comparison *)
   | Ocmpu: comparison -> binary_operation  (**r integer unsigned comparison *)
-  | Ocmpf: comparison -> binary_operation  (**r float comparison *)
+  | Ocmpf: comparison -> binary_operation  (**r float64 comparison *)
+  | Ocmpfs: comparison -> binary_operation (**r float32 comparison *)
   | Ocmpl: comparison -> binary_operation  (**r long signed comparison *)
   | Ocmplu: comparison -> binary_operation. (**r long unsigned comparison *)
 
@@ -240,6 +255,7 @@ Definition eval_constant (sp: val) (cst: constant) : option val :=
   match cst with
   | Ointconst n => Some (Vint n)
   | Ofloatconst n => Some (Vfloat n)
+  | Osingleconst n => Some (Vsingle n)
   | Olongconst n => Some (Vlong n)
   | Oaddrsymbol s ofs =>
       Some(match Genv.find_symbol ge s with
@@ -258,11 +274,18 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
   | Onotint => Some (Val.notint arg)
   | Onegf => Some (Val.negf arg)
   | Oabsf => Some (Val.absf arg)
+  | Onegfs => Some (Val.negfs arg)
+  | Oabsfs => Some (Val.absfs arg)
   | Osingleoffloat => Some (Val.singleoffloat arg)
+  | Ofloatofsingle => Some (Val.floatofsingle arg)
   | Ointoffloat => Val.intoffloat arg
   | Ointuoffloat => Val.intuoffloat arg
   | Ofloatofint => Val.floatofint arg
   | Ofloatofintu => Val.floatofintu arg
+  | Ointofsingle => Val.intofsingle arg
+  | Ointuofsingle => Val.intuofsingle arg
+  | Osingleofint => Val.singleofint arg
+  | Osingleofintu => Val.singleofintu arg
   | Onegl => Some (Val.negl arg)
   | Onotl => Some (Val.notl arg)
   | Ointoflong => Some (Val.loword arg)
@@ -272,6 +295,8 @@ Definition eval_unop (op: unary_operation) (arg: val) : option val :=
   | Olonguoffloat => Val.longuoffloat arg
   | Ofloatoflong => Val.floatoflong arg
   | Ofloatoflongu => Val.floatoflongu arg
+  | Olongofsingle => Val.longofsingle arg
+  | Olonguofsingle => Val.longuofsingle arg
   | Osingleoflong => Val.singleoflong arg
   | Osingleoflongu => Val.singleoflongu arg
   end.
@@ -296,6 +321,10 @@ Definition eval_binop
   | Osubf => Some (Val.subf arg1 arg2)
   | Omulf => Some (Val.mulf arg1 arg2)
   | Odivf => Some (Val.divf arg1 arg2)
+  | Oaddfs => Some (Val.addfs arg1 arg2)
+  | Osubfs => Some (Val.subfs arg1 arg2)
+  | Omulfs => Some (Val.mulfs arg1 arg2)
+  | Odivfs => Some (Val.divfs arg1 arg2)
   | Oaddl => Some (Val.addl arg1 arg2)
   | Osubl => Some (Val.subl arg1 arg2)
   | Omull => Some (Val.mull arg1 arg2)
@@ -312,6 +341,7 @@ Definition eval_binop
   | Ocmp c => Some (Val.cmp c arg1 arg2)
   | Ocmpu c => Some (Val.cmpu (Mem.valid_pointer m) c arg1 arg2)
   | Ocmpf c => Some (Val.cmpf c arg1 arg2)
+  | Ocmpfs c => Some (Val.cmpfs c arg1 arg2)
   | Ocmpl c => Val.cmpl c arg1 arg2
   | Ocmplu c => Val.cmplu c arg1 arg2
   end.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index d2c4d44..bdfc4b1 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -75,6 +75,7 @@ Definition const_for_result (a: aval) : option operation :=
   match a with
   | I n => Some(Ointconst n)
   | F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
+  | FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
   | Ptr(Gl symb ofs) => Some(Oaddrsymbol symb ofs)
   | Ptr(Stk ofs) => Some(Oaddrstack ofs)
   | _ => None
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index ecae5dc..b79c721 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -159,6 +159,8 @@ Proof.
   inv H. inv H0. exists (Vint n); auto.
 - (* float *)
   destruct (Compopts.generate_float_constants tt); inv H. inv H0. exists (Vfloat f); auto.
+- (* single *)
+  destruct (Compopts.generate_float_constants tt); inv H. inv H0. exists (Vsingle f); auto.
 - (* pointer *)
   destruct p; try discriminate.
   + (* global *)
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 6cb17e3..dcd8624 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -35,7 +35,7 @@ type node =
   { ident: int;                         (*r unique identifier *)
     typ: typ;                           (*r its type *)
     var: var;                           (*r the XTL variable it comes from *)
-    regclass: int;                      (*r identifier of register class *)
+    mutable regclass: int;              (*r identifier of register class *)
     mutable accesses: int;              (*r number of defs and uses *)
     mutable spillcost: float;           (*r estimated cost of spilling *)
     mutable adjlist: node list;         (*r all nodes it interferes with *)
@@ -239,15 +239,20 @@ type graph = {
 
 (* Register classes and reserved registers *)
 
-let num_register_classes = 2
+(* We have two main register classes:
+     0 for integer registers
+     1 for floating-point registers
+   plus a third pseudo-class 2 that has no registers and forces
+   stack allocation.  XTL variables are mapped to classes 0 and 1
+   according to their types.  A variable can be forced into class 2
+   by giving it a negative spill cost. *)
 
 let class_of_type = function
-  | Tint -> 0
-  | Tfloat | Tsingle -> 1
+  | Tint | Tany32 -> 0
+  | Tfloat | Tsingle | Tany64 -> 1
   | Tlong -> assert false
 
-let type_of_class c =
-  if c = 0 then Tint else Tfloat
+let no_spill_class = 2
 
 let reserved_registers = ref ([]: mreg list)
 
@@ -267,14 +272,19 @@ let init costs =
   and float_callee_save = remove_reserved float_callee_save_regs in
   {
     caller_save_registers =
-      [| Array.of_list int_caller_save; Array.of_list float_caller_save |];
+      [| Array.of_list int_caller_save;
+         Array.of_list float_caller_save;
+         [||] |];
     callee_save_registers =
-      [| Array.of_list int_callee_save; Array.of_list float_callee_save |];
+      [| Array.of_list int_callee_save;
+         Array.of_list float_callee_save;
+         [||] |];
     num_available_registers =
       [| List.length int_caller_save + List.length int_callee_save;
-         List.length float_caller_save + List.length float_callee_save |];
+         List.length float_caller_save + List.length float_callee_save;
+         0 |];
     start_points =
-      [| 0; 0 |];
+      [| 0; 0; 0 |];
     allocatable_registers =
       int_caller_save @ int_callee_save @ float_caller_save @ float_callee_save;
     stats_of_reg = costs;
@@ -303,10 +313,13 @@ let newNodeOfReg g r ty =
   let st = g.stats_of_reg r in
   g.nextIdent <- g.nextIdent + 1;
   { ident = g.nextIdent; typ = ty; 
-    var = V(r, ty); regclass = class_of_type ty;
+    var = V(r, ty);
+    regclass = if st.cost >= 0 then class_of_type ty else no_spill_class;
     accesses = st.usedefs;
     spillcost = weightedSpillCost st;
-    adjlist = []; degree = 0; movelist = []; extra_adj = []; extra_pref = [];
+    adjlist = [];
+    degree = if st.cost >= 0 then 0 else 1;
+    movelist = []; extra_adj = []; extra_pref = [];
     alias = None;
     color = None;
     nstate = Initial;
@@ -382,11 +395,19 @@ let recordExtraPref n1 n2 =
       mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
   n1.extra_pref <- m :: n1.extra_pref
 
+let recordExtraPref2 n1 n2 =
+  let m =
+    { src = n1; dst = n2; mstate = FrozenMoves;
+      mnext = DLinkMove.dummy; mprev = DLinkMove.dummy } in
+  n1.extra_pref <- m :: n1.extra_pref;
+  n2.extra_pref <- m :: n2.extra_pref
+
 let addMovePref g n1 n2 =
-  assert (n1.regclass = n2.regclass);
   match n1.color, n2.color with
   | None, None ->
-      recordMove g n1 n2
+      if n1.regclass = n2.regclass
+      then recordMove g n1 n2
+      else recordExtraPref2 n1 n2
   | Some (R mr1), None ->
       if List.mem mr1 g.allocatable_registers then recordMove g n1 n2
   | None, Some (R mr2) ->
@@ -866,7 +887,7 @@ let assign_color g n =
           n.color <- Some loc
       | None ->
           (* Last, pick a Local stack slot *)
-          n.color <- Some (find_slot slot_conflicts (type_of_class n.regclass))
+          n.color <- Some (find_slot slot_conflicts n.typ)
 
 (* Extract the location of a variable *)
 
@@ -884,7 +905,7 @@ let location_of_var g v =
         match ty with
         | Tint -> R dummy_int_reg
         | Tfloat | Tsingle -> R dummy_float_reg
-        | Tlong -> assert false
+        | Tlong | Tany32 | Tany64 -> assert false
 
 (* The exported interface *)
 
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index 8e20442..2564a73 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -691,6 +691,7 @@ Proof.
   apply Zone_divide.
   apply Zone_divide.
   apply H2; omega.
+  apply H2; omega.
 Qed.
 
 (** Preservation by external calls *)
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index b08fe87..ccf17e1 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -12,15 +12,12 @@
 
 (** Type-checking Linear code. *)
 
-Require Import FSets.
-Require FSetAVL.
 Require Import Coqlib.
-Require Import Ordered.
-Require Import Maps.
-Require Import Iteration.
 Require Import AST.
 Require Import Integers.
 Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
 Require Import Events.
 Require Import Op.
 Require Import Machregs.
@@ -29,172 +26,20 @@ Require Import Conventions.
 Require Import LTL.
 Require Import Linear.
 
-(** The typechecker for Linear enforce several properties useful for
-  the proof of the [Stacking] pass:
-- for each instruction, the type of the result register or slot
-  agrees with the type of values produced by the instruction;
-- correctness conditions on the offsets of stack slots
-  accessed through [Lgetstack] and [Lsetstack] Linear instructions.
-*)
-
-(** * Tracking the flow of single-precision floats *)
-
-Module Locset := FSetAVL.Make(OrderedLoc).
-
-(** At each program point, we infer a set of locations that are
-  guaranteed to contain single-precision floats.  [None] means
-  that the program point is not reachable. *)
-
-Definition singlefloats := option Locset.t.
-
-Definition FSbot : singlefloats := None.
-Definition FStop : singlefloats := Some Locset.empty.
-
-Definition setreg (fs: singlefloats) (r: mreg) (t: typ) :=
-  match fs with
-  | None => None
-  | Some s =>
-      Some(if typ_eq t Tsingle then Locset.add (R r) s else Locset.remove (R r) s)
-  end.
-
-Fixpoint setregs (fs: singlefloats) (rl: list mreg) (tl: list typ) :=
-  match rl, tl with
-  | nil, nil => fs
-  | r1 :: rs, t1 :: ts => setregs (setreg fs r1 t1) rs ts
-  | _, _ => fs
-  end.
-
-Definition copyloc (fs: singlefloats) (dst src: loc) :=
-  match fs with
-  | None => None
-  | Some s =>
-      Some(if Locset.mem src s then Locset.add dst s else Locset.remove dst s)
-  end.
-
-Definition destroyed_at_call_regs :=
-  List.fold_right (fun r fs => Locset.add (R r) fs) Locset.empty destroyed_at_call.
-
-Definition callregs (fs: singlefloats) :=
-  match fs with
-  | None => None
-  | Some s => Some (Locset.diff s destroyed_at_call_regs)
-  end.
-
-(** The forward dataflow analysis below records [singlefloats] sets
-  at every label.  Sets at other program points are recomputed when
-  needed. *)
-
-Definition labelmap := PTree.t Locset.t.
-
-(** [update_label lbl fs lm] updates the label map [lm] to reflect the
-  fact that the [singlefloats] set [fs] can flow into label [lbl].
-  It returns the set after the label, an updated label map, and a
-  boolean indicating whether the label map changed. *)
-
-Definition update_label (lbl: label) (fs: singlefloats) (lm: labelmap) : 
-                                              singlefloats * labelmap * bool :=
-  match fs, PTree.get lbl lm with
-  | None, None => (None, lm, false)
-  | None, Some s => (Some s, lm, false)
-  | Some s, None => (Some s, PTree.set lbl s lm, true)
-  | Some s1, Some s2 =>
-      if Locset.subset s2 s1
-      then (Some s2, lm, false)
-      else let s := Locset.inter s1 s2 in (Some s, PTree.set lbl s lm, true)
-  end.
-
-(** [update_labels] is similar, for a list of labels (coming from a 
-    [Ljumptable] instruction). *)
-
-Fixpoint update_labels (lbls: list label) (fs: singlefloats) (lm: labelmap): labelmap * bool :=
-  match lbls with
-  | nil => (lm, false)
-  | lbl1 :: lbls =>
-      let '(_, lm1, changed1) := update_label lbl1 fs lm in
-      let '(lm2, changed2) := update_labels lbls fs lm1 in
-      (lm2, changed1 || changed2)
-  end.
-
-(** One pass of forward analysis over the code [c].  Returns an updated
-  label map and a boolean indicating whether the label map changed. *)
-
-Fixpoint ana_code (lm: labelmap) (ch: bool) (fs: singlefloats) (c: code) : labelmap * bool :=
-  match c with
-  | nil => (lm, ch)
-  | Lgetstack sl ofs ty rd :: c =>
-      ana_code lm ch (copyloc fs (R rd) (S sl ofs ty)) c
-  | Lsetstack rs sl ofs ty :: c =>
-      ana_code lm ch (copyloc fs (S sl ofs ty) (R rs)) c
-  | Lop op args dst :: c =>
-      match is_move_operation op args with
-      | Some src => ana_code lm ch (copyloc fs (R dst) (R src)) c
-      | None     => ana_code lm ch (setreg fs dst (snd (type_of_operation op))) c
-      end
-  | Lload chunk addr args dst :: c =>
-      ana_code lm ch (setreg fs dst (type_of_chunk chunk)) c
-  | Lstore chunk addr args src :: c =>
-      ana_code lm ch fs c
-  | Lcall sg ros :: c =>
-      ana_code lm ch (callregs fs) c
-  | Ltailcall sg ros :: c =>
-      ana_code lm ch None c
-  | Lbuiltin ef args res :: c =>
-      ana_code lm ch (setregs fs res (proj_sig_res' (ef_sig ef))) c
-  | Lannot ef args :: c =>
-      ana_code lm ch fs c
-  | Llabel lbl :: c =>
-      let '(fs1, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) fs1 c
-  | Lgoto lbl :: c =>
-      let '(_, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) None c
-  | Lcond cond args lbl :: c =>
-      let '(_, lm1, ch1) := update_label lbl fs lm in
-      ana_code lm1 (ch || ch1) fs c
-  | Ljumptable r lbls :: c =>
-      let '(lm1, ch1) := update_labels lbls fs lm in
-      ana_code lm1 (ch || ch1) None c
-  | Lreturn :: c =>
-      ana_code lm ch None c
-  end.
-
-(** Iterating [ana_code] until the label map is stable. *)
-
-Definition ana_iter (c: code) (lm: labelmap) : labelmap + labelmap :=
-  let '(lm1, ch) := ana_code lm false FStop c in
-  if ch then inr _ lm1 else inl _ lm.
-
-Definition ana_function (f: function): option labelmap :=
-  PrimIter.iterate _ _ (ana_iter f.(fn_code)) (PTree.empty _).
-
-(** * The type-checker *)
-
-(** The typing rules are presented as boolean-valued functions so that we
+(** The rules are presented as boolean-valued functions so that we
   get an executable type-checker for free. *)
 
 Section WT_INSTR.
 
 Variable funct: function.
 
-Variable lm: labelmap.
-
-Definition FSmem (l: loc) (fs: singlefloats) : bool :=
-  match fs with None => true | Some s => Locset.mem l s end.
-
-Definition loc_type (fs: singlefloats) (l: loc) :=
-  let ty := Loc.type l in
-  if typ_eq ty Tfloat && FSmem l fs then Tsingle else ty.
-
 Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool :=
   match sl with
   | Local => zle 0 ofs
   | Outgoing => zle 0 ofs
   | Incoming => In_dec Loc.eq (S Incoming ofs ty) (loc_parameters funct.(fn_sig))
   end &&
-  match ty with
-  | Tint | Tfloat | Tsingle => true
-  | Tlong => false
-  end.
+  match ty with Tlong => false | _ => true end.
 
 Definition slot_writable (sl: slot) : bool :=
   match sl with
@@ -210,475 +55,169 @@ Definition loc_valid (l: loc) : bool :=
   | S _ _ _ => false
   end.
 
-Fixpoint wt_code (fs: singlefloats) (c: code) : bool :=
-  match c with
-  | nil => true
-  | Lgetstack sl ofs ty rd :: c =>
-      subtype ty (mreg_type rd) &&
-      slot_valid sl ofs ty &&
-      wt_code (copyloc fs (R rd) (S sl ofs ty)) c
-  | Lsetstack rs sl ofs ty :: c =>
-      subtype (loc_type fs (R rs)) ty &&
-      slot_valid sl ofs ty && slot_writable sl &&
-      wt_code (copyloc fs (S sl ofs ty) (R rs)) c
-  | Lop op args dst :: c =>
+Definition wt_instr (i: instruction) : bool :=
+  match i with
+  | Lgetstack sl ofs ty r =>
+      subtype ty (mreg_type r) && slot_valid sl ofs ty
+  | Lsetstack r sl ofs ty =>
+      slot_valid sl ofs ty && slot_writable sl
+  | Lop op args res =>
       match is_move_operation op args with
-      | Some src =>
-          typ_eq (mreg_type src) (mreg_type dst) &&
-          wt_code (copyloc fs (R dst) (R src)) c
-      | None =>
-          let (ty_args, ty_res) := type_of_operation op in
-          subtype ty_res (mreg_type dst) &&
-          wt_code (setreg fs dst ty_res) c
+      | Some arg =>
+          subtype (mreg_type arg) (mreg_type res)
+      | None => 
+          let (targs, tres) := type_of_operation op in
+          subtype tres (mreg_type res)
       end
-  | Lload chunk addr args dst :: c =>
-      subtype (type_of_chunk chunk) (mreg_type dst) &&
-      wt_code (setreg fs dst (type_of_chunk chunk)) c
-  | Lstore chunk addr args src :: c =>
-      wt_code fs c
-  | Lcall sg ros :: c =>
-      wt_code (callregs fs) c
-  | Ltailcall sg ros :: c =>
-      zeq (size_arguments sg) 0 &&
-      wt_code None c
-  | Lbuiltin ef args res :: c =>
-      let ty_res := proj_sig_res' (ef_sig ef) in
-      subtype_list ty_res (map mreg_type res) &&
-      wt_code (setregs fs res ty_res) c
-  | Lannot ef args :: c =>
-      forallb loc_valid args &&
-      wt_code fs c
-  | Llabel lbl :: c =>
-      wt_code lm!lbl c
-  | Lgoto lbl :: c =>
-      wt_code None c
-  | Lcond cond args lbl :: c =>
-      wt_code fs c
-  | Ljumptable r lbls :: c =>
-      wt_code None c
-  | Lreturn :: c =>
-      wt_code None c
+  | Lload chunk addr args dst =>
+      subtype (type_of_chunk chunk) (mreg_type dst)
+  | Ltailcall sg ros =>
+      zeq (size_arguments sg) 0
+  | Lbuiltin ef args res =>
+      subtype_list (proj_sig_res' (ef_sig ef)) (map mreg_type res) 
+  | Lannot ef args =>
+      forallb loc_valid args
+  | _ =>
+      true
   end.
 
 End WT_INSTR.
 
-Definition wt_funcode (f: function) (lm: labelmap) : bool :=
-  wt_code f lm FStop f.(fn_code).
+Definition wt_code (f: function) (c: code) : bool :=
+  forallb (wt_instr f) c.
 
 Definition wt_function (f: function) : bool :=
-  match ana_function f with
-  | None => false
-  | Some lm => wt_funcode f lm
-  end.
-
-(** * Properties of the static analysis *)
-
-Inductive FSincl: singlefloats -> singlefloats -> Prop :=
-  | FSincl_none: forall fs,
-      FSincl None fs
-  | FSincl_subset: forall s1 s2,
-      Locset.Subset s2 s1 -> FSincl (Some s1) (Some s2).
-
-Lemma update_label_false:
-  forall lbl fs lm fs' lm',
-  update_label lbl fs lm = (fs', lm', false) ->
-  FSincl fs lm!lbl /\ fs' = lm!lbl /\ lm' = lm.
-Proof.
-  unfold update_label; intros. 
-  destruct fs as [s1|]; destruct lm!lbl as [s2|].
-- destruct (Locset.subset s2 s1) eqn:S; inv H.
-  intuition. constructor. apply Locset.subset_2; auto.
-- inv H.
-- inv H. intuition. constructor. 
-- inv H. intuition. constructor.
-Qed.
-
-Lemma update_labels_false:
-  forall fs lbls lm lm',
-  update_labels lbls fs lm = (lm', false) ->
-  (forall lbl, In lbl lbls -> FSincl fs lm!lbl) /\ lm' = lm.
-Proof.
-  induction lbls; simpl; intros. 
-- inv H. tauto. 
-- destruct (update_label a fs lm) as [[fs1 lm1] changed1] eqn:UL.
-  destruct (update_labels lbls fs lm1) as [lm2 changed2] eqn:ULS.
-  inv H. apply orb_false_iff in H2. destruct H2; subst.
-  exploit update_label_false; eauto. intros (A & B & C).
-  exploit IHlbls; eauto. intros (D & E). subst.
-  split. intros. destruct H. congruence. auto.
-  auto.
-Qed.
-
-Lemma ana_code_false:
-  forall lm' c lm ch fs, ana_code lm ch fs c = (lm', false) -> ch = false /\ lm' = lm.
-Proof.
-  induction c; simpl; intros.
-- inv H; auto. 
-- destruct a; try (eapply IHc; eauto; fail).
-  + destruct (is_move_operation o l); eapply IHc; eauto.
-  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E).
-    auto.
-  + destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E). 
-    auto.
-  + destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_label_false; eauto. intros (C & D & E). 
-    auto.
-  + destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL.
-    exploit IHc; eauto. intros [A B]. apply orb_false_iff in A; destruct A; subst.
-    exploit update_labels_false; eauto. intros (C & D); subst.
-    auto.
-Qed.
-
-Lemma ana_function_inv:
-  forall f lm,
-  ana_function f = Some lm -> ana_code lm false FStop f.(fn_code) = (lm, false).
-Proof.
-  intros. unfold ana_function in H.
-  eapply PrimIter.iterate_prop with
-    (Q := fun lm => ana_code lm false FStop (fn_code f) = (lm, false))
-    (P := fun (lm: labelmap) => True); eauto.
-  intros. unfold ana_iter.
-  destruct (ana_code a false FStop (fn_code f)) as (lm1, ch1) eqn:ANA.
-  destruct ch1. auto. exploit ana_code_false; eauto. intros [A B]. congruence. 
-Qed.
-
-Remark wt_ana_code_cons:
-  forall f lm fs i c,
-  ana_code lm false fs (i :: c) = (lm, false) ->
-  wt_code f lm fs (i :: c) = true ->
-  exists fs', ana_code lm false fs' c = (lm, false) /\ wt_code f lm fs' c = true.
-Proof.
-  simpl; intros; destruct i; InvBooleans; try (econstructor; split; eassumption).
-- destruct (is_move_operation o l).
-  InvBooleans; econstructor; eauto.
-  destruct (type_of_operation o); InvBooleans; econstructor; eauto.
-- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (C & D & E); subst.
-  econstructor; eauto.
-- destruct (update_label l fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-- destruct (update_label l0 fs lm) as [[fs1 lm1] ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-- destruct (update_labels l fs lm) as [lm1 ch1] eqn:UL. 
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  econstructor; eauto.
-Qed.
-
-Lemma wt_find_label_rec:
-  forall f lm lbl c' c fs,
-  find_label lbl c = Some c' ->
-  ana_code lm false fs c = (lm, false) ->
-  wt_code f lm fs c = true ->
-  ana_code lm false (PTree.get lbl lm) c' = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c' = true.
-Proof.
-  induction c; intros.
-- discriminate.
-- simpl in H. specialize (is_label_correct lbl a). destruct (is_label lbl a); intros IL.
-  + subst a. inv H. simpl in *. 
-    destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-    exploit ana_code_false; eauto. intros [A B]; subst.
-    exploit update_label_false; eauto. intros (C & D & E). subst.
-    InvBooleans. auto.
-  + exploit wt_ana_code_cons; eauto. intros (fs' & A & B).
-    eapply IHc; eauto. 
-Qed.
+  wt_code f f.(fn_code).
 
-Lemma wt_find_label:
-  forall f lm lbl c,
-  ana_function f = Some lm ->
-  wt_funcode f lm = true ->
-  find_label lbl f.(fn_code) = Some c ->
-  ana_code lm false (PTree.get lbl lm) c = (lm, false) /\ wt_code f lm (PTree.get lbl lm) c = true.
-Proof.
-  intros. eapply wt_find_label_rec; eauto. apply ana_function_inv; auto. 
-Qed.
+(** Typing the run-time state. *)
 
-(** * Soundness of the type system *)
-
-Require Import Values.
-Require Import Globalenvs.
-Require Import Memory.
-
-Module LSF := FSetFacts.Facts(Locset).
-
-(** ** Typing the run-time state *)
-
-Inductive wt_locset: singlefloats -> locset -> Prop :=
-  | wt_locset_intro: forall s ls
-      (TY: forall l, Val.has_type (ls l) (Loc.type l))
-      (SINGLE: forall l, Locset.In l s -> Val.has_type (ls l) Tsingle),
-    wt_locset (Some s) ls.
-
-Lemma wt_mreg:
-  forall fs ls r, wt_locset fs ls -> Val.has_type (ls (R r)) (mreg_type r).
-Proof.
-  intros. inv H. apply (TY (R r)). 
-Qed.
-
-Lemma wt_loc_type:
-  forall fs ls l, wt_locset fs ls -> Val.has_type (ls l) (loc_type fs l).
-Proof.
-  intros. inv H. unfold loc_type, FSmem.
-  destruct (typ_eq (Loc.type l) Tfloat); simpl; auto.
-  destruct (Locset.mem l s) eqn:MEM; auto. 
-  apply SINGLE. apply Locset.mem_2; auto.
-Qed.
-
-Lemma loc_type_subtype:
-  forall fs l, subtype (loc_type fs l) (Loc.type l) = true.
-Proof.
-  unfold loc_type; intros. destruct (typ_eq (Loc.type l) Tfloat); simpl.
-  rewrite e. destruct (FSmem l fs); auto. 
-  destruct (Loc.type l); auto.
-Qed.
-
-Lemma wt_locset_top:
-  forall ls,
-  (forall l, Val.has_type (ls l) (Loc.type l)) -> 
-  wt_locset FStop ls.
-Proof.
-  intros; constructor; intros. 
-  auto.
-  eelim Locset.empty_1; eauto.
-Qed.
-
-Lemma wt_locset_mon:
-  forall fs1 fs2 ls,
-  FSincl fs1 fs2 -> wt_locset fs1 ls -> wt_locset fs2 ls.
-Proof.
-  intros. inv H0; inv H. constructor; intros; auto. 
-Qed.
+Definition wt_locset (ls: locset) : Prop :=
+  forall l, Val.has_type (ls l) (Loc.type l).
 
 Lemma wt_setreg:
-  forall fs ls r v ty,
-  Val.has_type v ty -> subtype ty (mreg_type r) = true -> wt_locset fs ls ->
-  wt_locset (setreg fs r ty) (Locmap.set (R r) v ls).
-Proof.
-  intros. inv H1. constructor; intros. 
-- unfold Locmap.set. destruct (Loc.eq (R r) l).
-  subst l; simpl. eapply Val.has_subtype; eauto.
-  destruct (Loc.diff_dec (R r) l); simpl; auto.
-- unfold Locmap.set. destruct (Loc.eq (R r) l).
-  destruct (typ_eq ty Tsingle). congruence. 
-  subst l. rewrite LSF.remove_iff in H1. intuition.
-  destruct (Loc.diff_dec (R r) l); simpl; auto.
-  apply SINGLE. 
-  destruct (typ_eq ty Tsingle).
-  rewrite LSF.add_iff in H1; intuition.
-  rewrite LSF.remove_iff in H1; intuition.
-Qed.
-
-Lemma wt_setregs:
-  forall vl tyl rl fs rs,
-  Val.has_type_list vl tyl ->
-  subtype_list tyl (map mreg_type rl) = true ->
-  wt_locset fs rs ->
-  wt_locset (setregs fs rl tyl) (Locmap.setlist (map R rl) vl rs).
-Proof.
-  induction vl; simpl; intros.
-- destruct tyl; try contradiction. destruct rl; try discriminate. 
-  simpl. auto.
-- destruct tyl as [ | ty tyl]; try contradiction. destruct H.
-  destruct rl as [ | r rl]; simpl in H0; try discriminate. InvBooleans.
-  simpl. eapply IHvl; eauto. eapply wt_setreg; eauto. 
-Qed.
-
-Lemma undef_regs_type:
-  forall ty l rl ls,
-  Val.has_type (ls l) ty -> Val.has_type (undef_regs rl ls l) ty.
+  forall ls r v,
+  Val.has_type v (mreg_type r) -> wt_locset ls -> wt_locset (Locmap.set (R r) v ls).
 Proof.
-  induction rl; simpl; intros.
-- auto.
-- unfold Locmap.set. destruct (Loc.eq (R a) l). red; auto. 
-  destruct (Loc.diff_dec (R a) l); auto. red; auto.
+  intros; red; intros.
+  unfold Locmap.set. 
+  destruct (Loc.eq (R r) l).
+  subst l; auto.
+  destruct (Loc.diff_dec (R r) l). auto. red. auto.
 Qed.
 
-Lemma wt_copyloc_gen:
-  forall fs ls src dst temps,
-  Val.has_type (ls src) (Loc.type dst) ->
-  wt_locset fs ls ->
-  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
+Lemma wt_setstack:
+  forall ls sl ofs ty v,
+  wt_locset ls -> wt_locset (Locmap.set (S sl ofs ty) v ls).
 Proof.
-  intros. inversion H0; subst. constructor; intros. 
-- unfold Locmap.set. destruct (Loc.eq dst l).
-  subst l. auto.
-  destruct (Loc.diff_dec dst l); simpl; auto.
-  apply undef_regs_type; auto.
-- unfold Locmap.set. destruct (Loc.eq dst l).
-  subst l. destruct (Locset.mem src s) eqn:E.
-  apply SINGLE. apply Locset.mem_2; auto. 
-  rewrite LSF.remove_iff in H1. intuition.
-  destruct (Loc.diff_dec dst l); simpl; auto.
-  apply undef_regs_type; auto.
-  apply SINGLE.
-  destruct (Locset.mem src s).
-  rewrite LSF.add_iff in H1. intuition.
-  rewrite LSF.remove_iff in H1. intuition.
-Qed.
-
-Lemma wt_copyloc:
-  forall fs ls src dst temps,
-  subtype (Loc.type src) (Loc.type dst) = true ->
-  wt_locset fs ls ->
-  wt_locset (copyloc fs dst src) (Locmap.set dst (ls src) (undef_regs temps ls)).
-Proof.
-  intros. eapply wt_copyloc_gen; eauto.
-  eapply Val.has_subtype; eauto. inv H0; auto. 
+  intros; red; intros.
+  unfold Locmap.set. 
+  destruct (Loc.eq (S sl ofs ty) l).
+  subst l. simpl. 
+  generalize (Val.load_result_type (chunk_of_type ty) v). 
+  replace (type_of_chunk (chunk_of_type ty)) with ty. auto.
+  destruct ty; reflexivity.
+  destruct (Loc.diff_dec (S sl ofs ty) l). auto. red. auto.
 Qed.
 
 Lemma wt_undef_regs:
-  forall fs ls temps, wt_locset fs ls -> wt_locset fs (undef_regs temps ls).
+  forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls).
 Proof.
-  intros. inv H; constructor; intros.
-- apply undef_regs_type; auto. 
-- apply undef_regs_type; auto. 
+  induction rs; simpl; intros. auto. apply wt_setreg; auto. red; auto.
 Qed.
 
 Lemma wt_call_regs:
-  forall fs ls, wt_locset fs ls -> wt_locset FStop (call_regs ls).
-Proof.
-  intros. inv H. apply wt_locset_top; intros. 
-  unfold call_regs. 
-  destruct l; auto. 
-  destruct sl; try exact I. 
-  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)); auto.
-Qed.
-
-Remark destroyed_at_call_regs_charact:
-  forall l,
-  Locset.In l destroyed_at_call_regs <->
-  match l with R r => In r destroyed_at_call | S _ _ _ => False end.
+  forall ls, wt_locset ls -> wt_locset (call_regs ls).
 Proof.
-  intros. unfold destroyed_at_call_regs. generalize destroyed_at_call. 
-  induction l0; simpl.
-- rewrite LSF.empty_iff. destruct l; tauto.
-- rewrite LSF.add_iff. rewrite IHl0. destruct l; intuition congruence.
+  intros; red; intros. unfold call_regs. destruct l. auto.
+  destruct sl.
+  red; auto.
+  change (Loc.type (S Incoming pos ty)) with (Loc.type (S Outgoing pos ty)). auto.
+  red; auto.
 Qed.
 
 Lemma wt_return_regs:
-  forall fs caller fs' callee,
-  wt_locset fs caller -> wt_locset fs' callee ->
-  wt_locset (callregs fs) (return_regs caller callee).
+  forall caller callee,
+  wt_locset caller -> wt_locset callee -> wt_locset (return_regs caller callee).
 Proof.
-  intros. inv H; inv H0; constructor; intros.
-- unfold return_regs. destruct l; auto. 
+  intros; red; intros.
+  unfold return_regs. destruct l; auto.
   destruct (in_dec mreg_eq r destroyed_at_call); auto.
-- unfold callregs. 
-  rewrite LSF.diff_iff in H. rewrite destroyed_at_call_regs_charact in H. destruct H.
-  unfold return_regs. destruct l.
-+ destruct (in_dec mreg_eq r destroyed_at_call). tauto. auto. 
-+ auto.
 Qed.
 
 Lemma wt_init:
-  forall s, wt_locset (Some s) (Locmap.init Vundef).
+  wt_locset (Locmap.init Vundef).
 Proof.
-  intros; constructor; intros; simpl; auto.
+  red; intros. unfold Locmap.init. red; auto.
 Qed.
 
-Lemma callregs_setregs_result:
-  forall sg fs,
-  FSincl (setregs fs (loc_result sg) (proj_sig_res' sg)) (callregs fs).
+Lemma wt_setlist:
+  forall vl rl rs,
+  Val.has_type_list vl (map mreg_type rl) ->
+  wt_locset rs ->
+  wt_locset (Locmap.setlist (map R rl) vl rs).
 Proof.
-  assert (X: forall rl tyl, setregs None rl tyl = None).
-  {
-    induction rl; destruct tyl; simpl; auto. 
-  }
-  assert (Y: forall rl s tyl,
-             exists s', setregs (Some s) rl tyl = Some s'
-                     /\ forall l, Locset.In l s -> ~In l (map R rl) -> Locset.In l s').
-  {
-    induction rl; simpl; intros.
-  - exists s; split. destruct tyl; auto. tauto.
-  - destruct tyl. exists s; tauto. 
-    destruct (IHrl (if typ_eq t Tsingle
-                    then Locset.add (R a) s
-                    else Locset.remove (R a) s) tyl) as (s1 & A & B).
-    exists s1; split; auto. intros. apply B.
-    destruct (typ_eq t Tsingle). 
-    rewrite LSF.add_iff. tauto. 
-    rewrite LSF.remove_iff. tauto.
-    tauto.
-  }
-  intros. destruct fs as [s|]; simpl.
-  - destruct (Y (loc_result sg) s (proj_sig_res' sg)) as (s' & A & B).
-    rewrite A. constructor. red; intros.
-    rewrite LSF.diff_iff in H. destruct H. apply B. auto. 
-    red; intros. exploit list_in_map_inv; eauto. intros (r & U & V).
-    subst a. elim H0. rewrite destroyed_at_call_regs_charact. 
-    eapply loc_result_caller_save; eauto.
-  - rewrite X. constructor.
+  induction vl; destruct rl; simpl; intros; try contradiction.
+  auto.
+  destruct H. apply IHvl; auto. apply wt_setreg; auto. 
+Qed.
+
+Lemma wt_find_label:
+  forall f lbl c,
+  wt_function f = true ->
+  find_label lbl f.(fn_code) = Some c ->
+  wt_code f c = true.
+Proof.
+  unfold wt_function; intros until c. generalize (fn_code f). induction c0; simpl; intros.
+  discriminate.
+  InvBooleans. destruct (is_label lbl a). 
+  congruence.
+  auto.
 Qed.
 
+(** Soundness of the type system *)
+
 Definition wt_fundef (fd: fundef) :=
   match fd with
   | Internal f => wt_function f = true
   | External ef => True
   end.
 
-Inductive wt_callstack: list stackframe -> singlefloats -> Prop :=
-  | wt_callstack_nil: forall s,
-      wt_callstack nil (Some s)
-  | wt_callstack_cons: forall f sp rs c s fs lm fs0 fs1
-        (WTSTK: wt_callstack s fs0)
-        (ANF: ana_function f = Some lm)
-        (WTF: wt_funcode f lm = true)
-        (ANC: ana_code lm false (callregs fs1) c = (lm, false))
-        (WTC: wt_code f lm (callregs fs1) c = true)
-        (WTRS: wt_locset fs rs)
-        (INCL: FSincl (callregs fs) (callregs fs1)),
-      wt_callstack (Stackframe f sp rs c :: s) fs.
+Inductive wt_callstack: list stackframe -> Prop :=
+  | wt_callstack_nil:
+      wt_callstack nil
+  | wt_callstack_cons: forall f sp rs c s
+        (WTSTK: wt_callstack s)
+        (WTF: wt_function f = true)
+        (WTC: wt_code f c = true)
+        (WTRS: wt_locset rs),
+      wt_callstack (Stackframe f sp rs c :: s).
 
 Lemma wt_parent_locset:
-  forall s fs, wt_callstack s fs -> wt_locset fs (parent_locset s).
+  forall s, wt_callstack s -> wt_locset (parent_locset s).
 Proof.
   induction 1; simpl.
 - apply wt_init.
 - auto.
 Qed.
 
-Lemma wt_callstack_change_fs:
-  forall s fs, wt_callstack s fs -> wt_callstack s (callregs fs).
-Proof.
-  induction 1.
-- constructor. 
-- econstructor; eauto. 
-  apply wt_locset_mon with fs; auto.
-  + destruct fs; simpl; constructor.
-    red; intros. eapply Locset.diff_1; eauto. 
-  + inv INCL; simpl; constructor.
-    destruct fs; simpl in H0; inv H0. 
-    red; intros. exploit H2; eauto. rewrite ! LSF.diff_iff. tauto.
-Qed.
-
 Inductive wt_state: state -> Prop :=
-  | wt_regular_state: forall s f sp c rs m lm fs fs0
-        (WTSTK: wt_callstack s fs0)
-        (ANF: ana_function f = Some lm)
-        (WTF: wt_funcode f lm = true)
-        (ANC: ana_code lm false fs c = (lm, false))
-        (WTC: wt_code f lm fs c = true)
-        (WTRS: wt_locset fs rs),
+  | wt_regular_state: forall s f sp c rs m
+        (WTSTK: wt_callstack s )
+        (WTF: wt_function f = true)
+        (WTC: wt_code f c = true)
+        (WTRS: wt_locset rs),
       wt_state (State s f sp c rs m)
-  | wt_call_state: forall s fd rs m fs
-        (WTSTK: wt_callstack s fs)
+  | wt_call_state: forall s fd rs m
+        (WTSTK: wt_callstack s)
         (WTFD: wt_fundef fd)
-        (WTRS: wt_locset fs rs),
+        (WTRS: wt_locset rs),
       wt_state (Callstate s fd rs m)
-  | wt_return_state: forall s rs m fs
-        (WTSTK: wt_callstack s fs)
-        (WTRS: wt_locset (callregs fs) rs),
+  | wt_return_state: forall s rs m
+        (WTSTK: wt_callstack s)
+        (WTRS: wt_locset rs),
       wt_state (Returnstate s rs m).
 
-(** ** Preservation of state typing by transitions *)
+(** Preservation of state typing by transitions *)
 
 Section SOUNDNESS.
 
@@ -709,129 +248,89 @@ Proof.
 - (* getstack *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_copyloc; auto.
+  eapply wt_setreg; eauto. eapply Val.has_subtype; eauto. apply WTRS. 
+  apply wt_undef_regs; auto.
 - (* setstack *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_copyloc_gen; auto.
-  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto.
+  apply wt_setstack. apply wt_undef_regs; auto.
 - (* op *)
   simpl in *. destruct (is_move_operation op args) as [src | ] eqn:ISMOVE.
   + (* move *)
     InvBooleans. exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst. 
     simpl in H. inv H.
-    econstructor; eauto. 
-    apply wt_copyloc; auto. simpl. rewrite H0. 
-    destruct (mreg_type res); auto.
+    econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. apply WTRS. 
+    apply wt_undef_regs; auto.
   + (* other ops *) 
     destruct (type_of_operation op) as [ty_args ty_res] eqn:TYOP. InvBooleans.
     econstructor; eauto.
-    apply wt_setreg; auto. 
-    change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. 
-    eapply type_of_operation_sound; eauto. 
+    apply wt_setreg; auto. eapply Val.has_subtype; eauto. 
+    change ty_res with (snd (ty_args, ty_res)). rewrite <- TYOP. eapply type_of_operation_sound; eauto. 
     red; intros; subst op. simpl in ISMOVE. 
     destruct args; try discriminate. destruct args; discriminate. 
     apply wt_undef_regs; auto.
 - (* load *)
   simpl in *; InvBooleans. 
   econstructor; eauto.
-  apply wt_setreg. 
+  apply wt_setreg. eapply Val.has_subtype; eauto. 
   destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
-  auto. 
   apply wt_undef_regs; auto.
 - (* store *)
   simpl in *; InvBooleans. 
-  econstructor; eauto.
+  econstructor. eauto. eauto. eauto. 
   apply wt_undef_regs; auto.
 - (* call *)
   simpl in *; InvBooleans.
   econstructor; eauto. econstructor; eauto.
-  destruct (callregs fs); constructor. red; auto. 
   eapply wt_find_function; eauto.
 - (* tailcall *)
   simpl in *; InvBooleans.
-  econstructor. apply wt_callstack_change_fs; eauto. 
+  econstructor; eauto. 
   eapply wt_find_function; eauto.
-  eapply wt_return_regs. apply wt_parent_locset; auto. eauto.
+  apply wt_return_regs; auto. apply wt_parent_locset; auto. 
 - (* builtin *)
   simpl in *; InvBooleans.
   econstructor; eauto.
-  apply wt_setregs. 
-  eapply external_call_well_typed'; eauto.
-  auto.
+  apply wt_setlist. 
+  eapply Val.has_subtype_list; eauto. eapply external_call_well_typed'; eauto. 
   apply wt_undef_regs; auto.
 - (* annot *)
   simpl in *; InvBooleans.
   econstructor; eauto. 
 - (* label *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  econstructor; eauto.
-  eapply wt_locset_mon; eauto.
+  simpl in *. econstructor; eauto.
 - (* goto *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto. 
-  eapply wt_locset_mon; eauto.
+  simpl in *. econstructor; eauto. eapply wt_find_label; eauto. 
 - (* cond branch, taken *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto. 
-  eapply wt_locset_mon. eauto. 
+  simpl in *. econstructor. auto. auto. eapply wt_find_label; eauto.
   apply wt_undef_regs; auto.
 - (* cond branch, not taken *)
-  simpl in *.
-  destruct (update_label lbl fs lm) as [[fs1 lm1] ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_label_false; eauto. intros (A & B & C); subst.
-  econstructor. eauto. eauto. eauto. eauto. eauto.  
+  simpl in *. econstructor. auto. auto. auto. 
   apply wt_undef_regs; auto.
 - (* jumptable *)
-  simpl in *.
-  destruct (update_labels tbl fs lm) as [lm1 ch1] eqn:UL.
-  exploit ana_code_false; eauto. intros [A B]; subst.
-  exploit update_labels_false; eauto. intros (A & B); subst.
-  exploit wt_find_label; eauto. intros [P Q].
-  econstructor; eauto.
-  apply wt_undef_regs. eapply wt_locset_mon; eauto. apply A. eapply list_nth_z_in; eauto. 
+  simpl in *. econstructor. auto. auto. eapply wt_find_label; eauto.
+  apply wt_undef_regs; auto.
 - (* return *)
-  econstructor. eauto. 
-  eapply wt_return_regs. 
-  apply wt_parent_locset; auto.
-  eauto.
+  simpl in *. InvBooleans. 
+  econstructor; eauto.
+  apply wt_return_regs; auto. apply wt_parent_locset; auto.
 - (* internal function *)
-  simpl in WTFD. unfold wt_function in WTFD.
-  destruct (ana_function f) as [lm|] eqn:ANF; try discriminate.
-  econstructor. eauto. eauto. eauto. apply ana_function_inv; auto. exact WTFD. 
-  apply wt_undef_regs. eapply wt_call_regs; eauto. 
+  simpl in WTFD.
+  econstructor. eauto. eauto. eauto.
+  apply wt_undef_regs. apply wt_call_regs. auto. 
 - (* external function *)
-  econstructor. eauto. 
-  eapply wt_locset_mon.
-  eapply callregs_setregs_result. 
-  eapply wt_setregs.
-  eapply external_call_well_typed'; eauto.
-  unfold proj_sig_res', loc_result. destruct (sig_res (ef_sig ef) )as [[] | ]; auto.
-  auto.
+  econstructor. auto. apply wt_setlist; auto.
+  eapply Val.has_subtype_list. apply loc_result_type. eapply external_call_well_typed'; eauto.
 - (* return *)
-  inv WTSTK. econstructor; eauto. 
-  apply wt_locset_mon with (callregs fs); auto. 
+  inv WTSTK. econstructor; eauto.
 Qed.
 
 Theorem wt_initial_state:
   forall S, initial_state prog S -> wt_state S.
 Proof.
-  induction 1. econstructor. 
-  apply wt_callstack_nil with (s := Locset.empty). 
+  induction 1. econstructor. constructor. 
   unfold ge0 in H1. exploit Genv.find_funct_ptr_inversion; eauto.
-  intros [id IN]. eapply wt_prog; eauto. 
+  intros [id IN]. eapply wt_prog; eauto.
   apply wt_init.
 Qed.
 
@@ -850,10 +349,9 @@ Qed.
 Lemma wt_state_setstack:
   forall s f sp sl ofs ty r c rs m,
   wt_state (State s f sp (Lsetstack r sl ofs ty :: c) rs m) ->
-  Val.has_type (rs (R r)) ty /\ slot_valid f sl ofs ty = true /\ slot_writable sl = true.
+  slot_valid f sl ofs ty = true /\ slot_writable sl = true.
 Proof.
-  intros. inv H. simpl in WTC; InvBooleans. intuition. 
-  eapply Val.has_subtype; eauto. eapply wt_loc_type; eauto. 
+  intros. inv H. simpl in WTC; InvBooleans. intuition.
 Qed.
 
 Lemma wt_state_tailcall:
@@ -877,5 +375,5 @@ Lemma wt_callstate_wt_regs:
   wt_state (Callstate s f rs m) ->
   forall r, Val.has_type (rs (R r)) (mreg_type r).
 Proof.
-  intros. inv H. eapply wt_mreg; eauto.
+  intros. inv H. apply WTRS. 
 Qed.
diff --git a/backend/Locations.v b/backend/Locations.v
index 96f1eba..5674b93 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -69,7 +69,14 @@ Defined.
 Open Scope Z_scope.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 | Tsingle => 1 end.
+  match ty with
+  | Tint => 1
+  | Tlong => 2
+  | Tfloat => 2
+  | Tsingle => 1
+  | Tany32 => 1
+  | Tany64 => 2
+  end.
 
 Lemma typesize_pos:
   forall (ty: typ), typesize ty > 0.
@@ -301,16 +308,29 @@ Module Locmap.
 
   Definition set (l: loc) (v: val) (m: t) : t :=
     fun (p: loc) =>
-      if Loc.eq l p then v
-      else if Loc.diff_dec l p then m p
+      if Loc.eq l p then
+        match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end
+      else if Loc.diff_dec l p then
+        m p
       else Vundef.
 
   Lemma gss: forall l v m,
-    (set l v m) l = v.
+    (set l v m) l = 
+    match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end.
   Proof.
     intros. unfold set. apply dec_eq_true.
   Qed.
 
+  Lemma gss_reg: forall r v m, (set (R r) v m) (R r) = v.
+  Proof.
+    intros. unfold set. rewrite dec_eq_true. auto.
+  Qed.
+
+  Lemma gss_typed: forall l v m, Val.has_type v (Loc.type l) -> (set l v m) l = v.
+  Proof.
+    intros. rewrite gss. destruct l. auto. apply Val.load_result_same; auto. 
+  Qed.
+
   Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p.
   Proof.
     intros. unfold set. destruct (Loc.eq l p).
@@ -336,10 +356,11 @@ Module Locmap.
   Proof.
     assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
       induction ll; simpl; intros. auto. apply IHll. 
-      unfold set. destruct (Loc.eq a l). auto.
+      unfold set. destruct (Loc.eq a l).
+      destruct a. auto. destruct ty; reflexivity. 
       destruct (Loc.diff_dec a l); auto.
     induction ll; simpl; intros. contradiction. 
-    destruct H. apply P. subst a. apply gss.  
+    destruct H. apply P. subst a. apply gss_typed. exact I. 
     auto.
   Qed.
 
@@ -364,10 +385,12 @@ Module IndexedTyp <: INDEXED_TYPE.
   Definition t := typ.
   Definition index (x: t) :=
     match x with
-    | Tint => 1%positive
-    | Tsingle => 2%positive
-    | Tfloat => 3%positive
-    | Tlong => 4%positive
+    | Tany32 => 1%positive
+    | Tint => 2%positive
+    | Tsingle => 3%positive
+    | Tany64 => 4%positive
+    | Tfloat => 5%positive
+    | Tlong => 6%positive
     end.
   Lemma index_inj: forall x y, index x = index y -> x = y.
   Proof. destruct x; destruct y; simpl; congruence. Qed.
@@ -456,7 +479,7 @@ Module OrderedLoc <: OrderedType.
   Definition diff_low_bound (l: loc) : loc :=
     match l with
     | R mr => l
-    | S sl ofs ty => S sl (ofs - 1) Tfloat
+    | S sl ofs ty => S sl (ofs - 1) Tany64
     end.
 
   Definition diff_high_bound (l: loc) : loc :=
@@ -480,9 +503,9 @@ Module OrderedLoc <: OrderedType.
         destruct H. right.
         destruct H0. right. generalize (RANGE ty'); omega. 
         destruct H0. 
-        assert (ty' = Tint \/ ty' = Tsingle). 
+        assert (ty' = Tint \/ ty' = Tsingle \/ ty' = Tany32). 
         { unfold OrderedTyp.lt in H1. destruct ty'; auto; compute in H1; congruence. }
-        right. destruct H2; subst ty'; simpl typesize; omega. 
+        right. destruct H2 as [E|[E|E]]; subst ty'; simpl typesize; omega. 
       + destruct H. left. apply OrderedSlot.lt_not_eq; auto.
         destruct H. right.
         destruct H0. left; omega.
@@ -502,13 +525,13 @@ Module OrderedLoc <: OrderedType.
       destruct H. contradiction. 
       destruct H.
       right; right; split; auto. left; omega. 
-      left; right; split; auto. destruct ty'; simpl in *.
-      destruct (zlt ofs' (ofs - 1)). left; auto. 
-      right; split. omega. compute. auto. 
+      left; right; split; auto.
+      assert (EITHER: typesize ty' = 1 /\ OrderedTyp.lt ty' Tany64 \/ typesize ty' = 2).
+      { destruct ty'; compute; auto. }
+      destruct (zlt ofs' (ofs - 1)). left; auto.
+      destruct EITHER as [[P Q] | P].
+      right; split; auto. omega.
       left; omega. 
-      left; omega.
-      destruct (zlt ofs' (ofs - 1)). left; auto. 
-      right; split. omega. compute. auto. 
   Qed.
 
 End OrderedLoc.
diff --git a/backend/NeedDomain.v b/backend/NeedDomain.v
index f050c72..73b6831 100644
--- a/backend/NeedDomain.v
+++ b/backend/NeedDomain.v
@@ -33,7 +33,6 @@ Require Import RTL.
 Inductive nval : Type :=
   | Nothing              (**r value is entirely unused *)
   | I (m: int)           (**r only need the bits that are 1 in [m] *)
-  | Fsingle              (**r only need the value after conversion to single float *)
   | All.                 (**r every bit of the value is used *)
 
 Definition eq_nval (x y: nval) : {x=y} + {x<>y}.
@@ -56,12 +55,6 @@ Fixpoint vagree (v w: val) (x: nval) {struct x}: Prop :=
       | Vint p, _ => False
       | _, _ => True                                 
       end
-  | Fsingle =>
-      match v, w with
-      | Vfloat f, Vfloat g => Float.singleoffloat f = Float.singleoffloat g
-      | Vfloat _, _ => False
-      | _, _ => True
-      end
   | All => Val.lessdef v w
   end.
 
@@ -115,9 +108,7 @@ Inductive nge: nval -> nval -> Prop :=
   | nge_all: forall x, nge x Nothing
   | nge_int: forall m1 m2,
       (forall i, 0 <= i < Int.zwordsize -> Int.testbit m2 i = true -> Int.testbit m1 i = true) ->
-      nge (I m1) (I m2)
-  | nge_single:
-      nge Fsingle Fsingle.
+      nge (I m1) (I m2).
 
 Lemma nge_refl: forall x, nge x x.
 Proof.
@@ -145,7 +136,6 @@ Definition nlub (x y: nval) : nval :=
   | Nothing, _ => y
   | _, Nothing => x
   | I m1, I m2 => I (Int.or m1 m2)
-  | Fsingle, Fsingle => Fsingle
   | _, _ => All
   end.
 
@@ -388,14 +378,14 @@ Ltac InvAgree :=
   auto || exact Logic.I ||
   match goal with
   | [ H: False |- _ ] => contradiction
-  | [ H: match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end |- _ ] => destruct v
+  | [ H: match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vsingle _ => _ | Vptr _ _ => _ end |- _ ] => destruct v
   end).
 
 (** And immediate, or immediate *)
 
 Definition andimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.and m n)
   | All => I n
   end.
@@ -408,13 +398,12 @@ Proof.
   unfold andimm; intros; destruct x; simpl in *; unfold Val.and.
 - auto.
 - InvAgree. apply iagree_and; auto.
-- destruct v; destruct w; tauto.
 - InvAgree. rewrite iagree_and_eq in H. rewrite H; auto.
 Qed.
 
 Definition orimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.and m (Int.not n))
   | _ => I (Int.not n)
   end.
@@ -427,21 +416,16 @@ Proof.
   unfold orimm; intros; destruct x; simpl in *.
 - auto.
 - unfold Val.or; InvAgree. apply iagree_or; auto.
-- destruct v; destruct w; tauto.
 - InvAgree. simpl. apply Val.lessdef_same. f_equal. apply iagree_mone. 
   apply iagree_or. rewrite Int.and_commut. rewrite Int.and_mone. auto.
 Qed.
 
 (** Bitwise operations: and, or, xor, not *)
 
-Definition bitwise (x: nval) :=
-  match x with
-  | Fsingle => Nothing
-  | _ => x
-  end.
+Definition bitwise (x: nval) := x.
 
 Remark bitwise_idem: forall nv, bitwise (bitwise nv) = bitwise nv.
-Proof. destruct nv; auto. Qed.
+Proof. auto. Qed.
 
 Lemma vagree_bitwise_binop:
   forall f,
@@ -456,7 +440,6 @@ Proof.
   unfold bitwise; intros. destruct x; simpl in *.
 - auto. 
 - InvAgree. 
-- destruct v1; auto. destruct v2; auto. 
 - inv H0; auto. inv H1; auto. destruct w1; auto.
 Qed.
 
@@ -489,7 +472,7 @@ Qed.
 
 Definition shlimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.shru m n)
   | All => I (Int.shru Int.mone n)
   end.
@@ -504,14 +487,13 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_shl; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shl; auto.
 - destruct v; auto with na.
 Qed.
 
 Definition shruimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.shl m n)
   | All => I (Int.shl Int.mone n)
   end.
@@ -526,14 +508,13 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_shru; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shru; auto.
 - destruct v; auto with na.
 Qed.
 
 Definition shrimm (x: nval) (n: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (let m' := Int.shl m n in
               if Int.eq_dec (Int.shru m' n) m
               then m'
@@ -554,14 +535,13 @@ Proof.
   destruct (Int.eq_dec (Int.shru (Int.shl m n) n) m).
   apply iagree_shr_1; auto.
   apply iagree_shr; auto.
-- destruct v; destruct w; auto.
 - InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. apply iagree_shr. auto.
 - destruct v; auto with na.
 Qed.
 
 Definition rolm (x: nval) (amount mask: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.ror (Int.and m mask) amount)
   | _ => I (Int.ror mask amount)
   end.
@@ -575,7 +555,6 @@ Proof.
 - auto.
 - unfold Val.rolm; InvAgree. unfold Int.rolm. 
   apply iagree_and. apply iagree_rol. auto. 
-- unfold Val.rolm; destruct v, w; auto.
 - unfold Val.rolm; InvAgree. apply Val.lessdef_same. f_equal. apply iagree_mone. 
   unfold Int.rolm. apply iagree_and. apply iagree_rol. rewrite Int.and_commut. 
   rewrite Int.and_mone. auto.
@@ -583,7 +562,7 @@ Qed.
 
 Definition ror (x: nval) (amount: int) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.rol m amount)
   | All => All
   end.
@@ -598,7 +577,6 @@ Proof.
   destruct x; simpl in *.
 - auto.
 - InvAgree. apply iagree_ror; auto. 
-- destruct v, w; auto.
 - inv H; auto.
 - destruct v; auto with na.
 Qed.
@@ -608,7 +586,7 @@ Qed.
 
 Definition modarith (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (complete_mask m)
   | All => All
   end.
@@ -621,7 +599,6 @@ Proof.
   unfold modarith; intros. destruct x; simpl in *. 
 - auto.
 - unfold Val.add; InvAgree. apply eqmod_iagree. apply Int.eqmod_add; apply iagree_eqmod; auto. 
-- unfold Val.add; destruct v1, w1; auto; destruct v2, w2; auto.
 - inv H; auto. inv H0; auto. destruct w1; auto. 
 Qed.
 
@@ -638,7 +615,6 @@ Proof.
   unfold mul, add; intros. destruct x; simpl in *. 
 - auto.
 - unfold Val.mul; InvAgree. apply eqmod_iagree. apply Int.eqmod_mult; apply iagree_eqmod; auto. 
-- unfold Val.mul; destruct v1, w1; auto; destruct v2, w2; auto.
 - inv H; auto. inv H0; auto. destruct w1; auto. 
 Qed.
 
@@ -651,7 +627,6 @@ Proof.
 - auto.
 - unfold Val.neg; InvAgree. 
   apply eqmod_iagree. apply Int.eqmod_neg. apply iagree_eqmod; auto. 
-- destruct v, w; simpl; auto. 
 - inv H; simpl; auto.
 Qed.
 
@@ -659,7 +634,7 @@ Qed.
 
 Definition zero_ext (n: Z) (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.zero_ext n m)
   | All => I (Int.zero_ext n Int.mone)
   end.
@@ -677,7 +652,6 @@ Proof.
   red; intros. autorewrite with ints; try omega. 
   destruct (zlt i1 n); auto. apply H; auto.
   autorewrite with ints; try omega. rewrite zlt_true; auto.
-- unfold Val.zero_ext; destruct v; destruct w; auto.
 - unfold Val.zero_ext; InvAgree; auto. apply Val.lessdef_same. f_equal. 
   Int.bit_solve; try omega. destruct (zlt i1 n); auto. apply H; auto. 
   autorewrite with ints; try omega. apply zlt_true; auto.
@@ -685,7 +659,7 @@ Qed.
 
 Definition sign_ext (n: Z) (x: nval) :=
   match x with
-  | Nothing | Fsingle => Nothing
+  | Nothing => Nothing
   | I m => I (Int.or (Int.zero_ext n m) (Int.shl Int.one (Int.repr (n - 1))))
   | All => I (Int.zero_ext n Int.mone)
   end.
@@ -710,7 +684,6 @@ Proof.
   right. rewrite Int.unsigned_repr. rewrite zlt_false by omega. 
   replace (n - 1 - (n - 1)) with 0 by omega. reflexivity. 
   generalize Int.wordsize_max_unsigned; omega.
-- unfold Val.sign_ext; destruct v; destruct w; auto.
 - unfold Val.sign_ext; InvAgree; auto. apply Val.lessdef_same. f_equal. 
   Int.bit_solve; try omega.
   set (j := if zlt i1 n then i1 else n - 1).
@@ -721,31 +694,12 @@ Proof.
   unfold j. destruct (zlt i1 n); omega.
 Qed.
 
-Definition singleoffloat (x: nval) :=
-  match x with
-  | Nothing | I _ => Nothing
-  | Fsingle | All => Fsingle
-  end.
-
-Lemma singleoffloat_sound:
-  forall v w x,
-  vagree v w (singleoffloat x) ->
-  vagree (Val.singleoffloat v) (Val.singleoffloat w) x.
-Proof.
-  unfold singleoffloat; intros. destruct x; simpl in *. 
-- auto.
-- unfold Val.singleoffloat; destruct v, w; auto. 
-- unfold Val.singleoffloat; InvAgree. congruence.
-- unfold Val.singleoffloat; InvAgree; auto. rewrite H; auto. 
-Qed.
-
 (** The needs of a memory store concerning the value being stored. *)
 
 Definition store_argument (chunk: memory_chunk) :=
   match chunk with
   | Mint8signed | Mint8unsigned => I (Int.repr 255)
   | Mint16signed | Mint16unsigned => I (Int.repr 65535)
-  | Mfloat32 => Fsingle
   | _ => All
   end.
 
@@ -781,10 +735,9 @@ Proof.
   change 16 with (Int.size (Int.repr 65535)). apply iagree_eqmod; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
-- InvAgree. apply SAME. simpl.
-  rewrite <- (Float.bits_of_singleoffloat f).
-  rewrite <- (Float.bits_of_singleoffloat f0).
-  congruence. 
+- apply encode_val_inject. rewrite val_inject_id; auto.
+- apply encode_val_inject. rewrite val_inject_id; auto.
+- apply encode_val_inject. rewrite val_inject_id; auto.
 - apply encode_val_inject. rewrite val_inject_id; auto.
 Qed.
 
@@ -803,8 +756,6 @@ Proof.
   auto. compute; auto.
 - apply zero_ext_sound with (v := Vint i) (w := Vint i0) (x := All) (n := 16).
   auto. omega.
-- apply singleoffloat_sound with (v := Vfloat f) (w := Vfloat f0) (x := All).
-  auto. 
 Qed.
 
 (** The needs of a comparison *)
@@ -1034,24 +985,6 @@ Proof.
   rewrite Int.bits_zero_ext in H3 by omega. rewrite zlt_false in H3 by auto. discriminate.
 Qed.
 
-Definition singleoffloat_redundant (x: nval) :=
-  match x with
-  | Nothing => true
-  | Fsingle => true
-  | _ => false
-  end.
-
-Lemma singleoffloat_redundant_sound:
-  forall v w x,
-  singleoffloat_redundant x = true ->
-  vagree v w (singleoffloat x) ->
-  vagree (Val.singleoffloat v) w x.
-Proof.
-  unfold singleoffloat; intros. destruct x; try discriminate.
-- auto.
-- simpl in *; InvAgree. simpl. rewrite Float.singleoffloat_idem; auto. 
-Qed.
-
 (** * Neededness for register environments *)
 
 Module NVal <: SEMILATTICE.
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 4c53c5e..51a45b2 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -31,10 +31,10 @@ let rec precedence = function
   | Evar _ -> (16, NA)
   | Econst _ -> (16, NA)
   | Eunop _ -> (15, RtoL)
-  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR)
-  | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddl|Osubl), _, _) -> (12, LtoR)
+  | Ebinop((Omul|Odiv|Odivu|Omod|Omodu|Omulf|Odivf|Omulfs|Odivfs|Omull|Odivl|Odivlu|Omodl|Omodlu), _, _) -> (13, LtoR)
+  | Ebinop((Oadd|Osub|Oaddf|Osubf|Oaddfs|Osubfs|Oaddl|Osubl), _, _) -> (12, LtoR)
   | Ebinop((Oshl|Oshr|Oshru|Oshll|Oshrl|Oshrlu), _, _) -> (11, LtoR)
-  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
+  | Ebinop((Ocmp _|Ocmpu _|Ocmpf _|Ocmpfs _|Ocmpl _|Ocmplu _), _, _) -> (10, LtoR)
   | Ebinop((Oand|Oandl), _, _) -> (8, LtoR)
   | Ebinop((Oxor|Oxorl), _, _) -> (7, LtoR)
   | Ebinop((Oor|Oorl), _, _) -> (6, LtoR)
@@ -55,11 +55,18 @@ let name_of_unop = function
   | Onotint -> "~"
   | Onegf -> "-f"
   | Oabsf -> "absf"
+  | Onegfs -> "-s"
+  | Oabsfs -> "abss"
   | Osingleoffloat -> "float32"
+  | Ofloatofsingle -> "float64"
   | Ointoffloat -> "intoffloat"
   | Ointuoffloat -> "intuoffloat"
   | Ofloatofint -> "floatofint"
   | Ofloatofintu -> "floatofintu"
+  | Ointofsingle -> "intofsingle"
+  | Ointuofsingle -> "intuofsingle"
+  | Osingleofint -> "singleofint"
+  | Osingleofintu -> "singleofintu"
   | Onegl -> "-l"
   | Onotl -> "~l"
   | Ointoflong -> "intoflong"
@@ -69,6 +76,8 @@ let name_of_unop = function
   | Olonguoffloat -> "longuoffloat"
   | Ofloatoflong -> "floatoflong"
   | Ofloatoflongu -> "floatoflongu"
+  | Olongofsingle -> "longofsingle"
+  | Olonguofsingle -> "longuofsingle"
   | Osingleoflong -> "singleoflong"
   | Osingleoflongu -> "singleoflongu"
 
@@ -98,6 +107,10 @@ let name_of_binop = function
   | Osubf -> "-f"
   | Omulf -> "*f"
   | Odivf -> "/f"
+  | Oaddfs -> "+s"
+  | Osubfs -> "-s"
+  | Omulfs -> "*s"
+  | Odivfs -> "/s"
   | Oaddl -> "+l"
   | Osubl -> "-l"
   | Omull -> "*l"
@@ -114,6 +127,7 @@ let name_of_binop = function
   | Ocmp c -> comparison_name c
   | Ocmpu c -> comparison_name c ^ "u"
   | Ocmpf c -> comparison_name c ^ "f"
+  | Ocmpfs c -> comparison_name c ^ "s"
   | Ocmpl c -> comparison_name c ^ "l"
   | Ocmplu c -> comparison_name c ^ "lu"
 
@@ -135,6 +149,8 @@ let rec expr p (prec, e) =
       fprintf p "%ld" (camlint_of_coqint n)
   | Econst(Ofloatconst f) ->
       fprintf p "%F" (camlfloat_of_coqfloat f)
+  | Econst(Osingleconst f) ->
+      fprintf p "%Ff" (camlfloat_of_coqfloat32 f)
   | Econst(Olongconst n) ->
       fprintf p "%LdLL" (camlint64_of_coqint n)
   | Econst(Oaddrsymbol(id, ofs)) ->
@@ -171,6 +187,8 @@ let name_of_type = function
   | Tfloat -> "float"
   | Tlong -> "long"
   | Tsingle -> "single"
+  | Tany32 -> "any32"
+  | Tany64 -> "any64"
 
 let print_sig p sg =
   List.iter
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index ff45809..08ccf12 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -34,6 +34,8 @@ let short_name_of_type = function
   | Tfloat -> 'f'
   | Tlong -> 'l'
   | Tsingle -> 's'
+  | Tany32 -> 'w'
+  | Tany64 -> 'd'
 
 let loc pp = function
   | R r -> mreg pp r
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index e8ae7ae..5042c77 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -36,8 +36,6 @@ Require Import Conventions.
   is very simple, consisting of the four types [Tint] (for integers
   and pointers), [Tfloat] (for double-precision floats), [Tlong]
   (for 64-bit integers) and [Tsingle] (for single-precision floats).
-  At the RTL level, we simplify things further by equating [Tsingle]
-  with [Tfloat].
 
   Additionally, we impose that each pseudo-register has the same type
   throughout the function.  This requirement helps with register allocation,
@@ -57,11 +55,6 @@ Require Import Conventions.
   which can work over both integers and floats.
 *)
 
-Definition normalize (ty: typ) : typ :=
-  match ty with Tsingle => Tfloat | _ => ty end.
-
-Definition normalize_list (tyl: list typ) : list typ := map normalize tyl.
-
 Definition regenv := reg -> typ.
 
 Section WT_INSTR.
@@ -86,39 +79,39 @@ Inductive wt_instr : instruction -> Prop :=
       forall op args res s,
       op <> Omove ->
       map env args = fst (type_of_operation op) ->
-      env res = normalize (snd (type_of_operation op)) ->
+      env res = snd (type_of_operation op) ->
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
       forall chunk addr args dst s,
       map env args = type_of_addressing addr ->
-      env dst = normalize (type_of_chunk chunk) ->
+      env dst = type_of_chunk chunk ->
       valid_successor s ->
       wt_instr (Iload chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
       map env args = type_of_addressing addr ->
-      env src = type_of_chunk_use chunk ->
+      env src = type_of_chunk chunk ->
       valid_successor s ->
       wt_instr (Istore chunk addr args src s)
   | wt_Icall:
       forall sig ros args res s,
       match ros with inl r => env r = Tint | inr s => True end ->
-      map env args = normalize_list sig.(sig_args) ->
-      env res = normalize (proj_sig_res sig) ->
+      map env args = sig.(sig_args) ->
+      env res = proj_sig_res sig ->
       valid_successor s ->
       wt_instr (Icall sig ros args res s)
   | wt_Itailcall:
       forall sig ros args,
       match ros with inl r => env r = Tint | inr s => True end ->
-      map env args = normalize_list sig.(sig_args) ->
+      map env args = sig.(sig_args) ->
       sig.(sig_res) = funct.(fn_sig).(sig_res) ->
       tailcall_possible sig ->
       wt_instr (Itailcall sig ros args)
   | wt_Ibuiltin:
       forall ef args res s,
-      map env args = normalize_list (ef_sig ef).(sig_args) ->
-      env res = normalize (proj_sig_res (ef_sig ef)) ->
+      map env args = (ef_sig ef).(sig_args) ->
+      env res = proj_sig_res (ef_sig ef) ->
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
@@ -139,7 +132,7 @@ Inductive wt_instr : instruction -> Prop :=
   | wt_Ireturn_some:
       forall arg ty,
       funct.(fn_sig).(sig_res) = Some ty ->
-      env arg = normalize ty ->
+      env arg = ty ->
       wt_instr (Ireturn (Some arg)).
 
 End WT_INSTR.
@@ -152,7 +145,7 @@ End WT_INSTR.
 Record wt_function (f: function) (env: regenv): Prop :=
   mk_wt_function {
     wt_params:
-      map env f.(fn_params) = normalize_list f.(fn_sig).(sig_args);
+      map env f.(fn_params) = f.(fn_sig).(sig_args);
     wt_norepet:
       list_norepet f.(fn_params);
     wt_instrs:
@@ -231,23 +224,23 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
         end
       else
        (let (targs, tres) := type_of_operation op in
-        do e1 <- S.set_list e args targs; S.set e1 res (normalize tres))
+        do e1 <- S.set_list e args targs; S.set e1 res tres)
   | Iload chunk addr args dst s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
-      S.set e1 dst (normalize (type_of_chunk chunk))
+      S.set e1 dst (type_of_chunk chunk)
   | Istore chunk addr args src s =>
       do x <- check_successor s;
       do e1 <- S.set_list e args (type_of_addressing addr);
-      S.set e1 src (type_of_chunk_use chunk)
+      S.set e1 src (type_of_chunk chunk)
   | Icall sig ros args res s =>
       do x <- check_successor s;
       do e1 <- type_ros e ros;
-      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
-      S.set e2 res (normalize (proj_sig_res sig))
+      do e2 <- S.set_list e1 args sig.(sig_args);
+      S.set e2 res (proj_sig_res sig)
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
-      do e2 <- S.set_list e1 args (normalize_list sig.(sig_args));
+      do e2 <- S.set_list e1 args sig.(sig_args);
       if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
@@ -256,8 +249,8 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Ibuiltin ef args res s =>
       let sig := ef_sig ef in
       do x <- check_successor s;
-      do e1 <- S.set_list e args (normalize_list sig.(sig_args));
-      S.set e1 res (normalize (proj_sig_res sig))
+      do e1 <- S.set_list e args sig.(sig_args);
+      S.set e1 res (proj_sig_res sig)
  | Icond cond args s1 s2 =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
@@ -271,7 +264,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   | Ireturn optres =>
       match optres, f.(fn_sig).(sig_res) with
       | None, None => OK e
-      | Some r, Some t => S.set e r (normalize t)
+      | Some r, Some t => S.set e r t
       | _, _ => Error(msg "bad return")
       end
   end.
@@ -297,7 +290,7 @@ Definition check_params_norepet (params: list reg): res unit :=
 
 Definition type_function : res regenv :=
   do e1 <- type_code S.initial;
-  do e2 <- S.set_list e1 f.(fn_params) (normalize_list f.(fn_sig).(sig_args));
+  do e2 <- S.set_list e1 f.(fn_params) f.(fn_sig).(sig_args);
   do te <- S.solve e2;
   do x1 <- check_params_norepet f.(fn_params);
   do x2 <- check_successor f.(fn_entrypoint);
@@ -597,7 +590,7 @@ Proof.
   intros. destruct H. 
   destruct (type_code_complete te S.initial) as (e1 & A & B).
   auto. apply S.satisf_initial. 
-  destruct (S.set_list_complete te f.(fn_params) (normalize_list f.(fn_sig).(sig_args)) e1) as (e2 & C & D); auto.
+  destruct (S.set_list_complete te f.(fn_params) f.(fn_sig).(sig_args) e1) as (e2 & C & D); auto.
   destruct (S.solve_complete te e2) as (te' & E); auto.
   exists te'; unfold type_function.
   rewrite A; simpl. rewrite C; simpl. rewrite E; simpl. 
@@ -637,23 +630,6 @@ Proof.
   apply H.
 Qed.
 
-Lemma normalize_subtype:
-  forall ty, subtype ty (normalize ty) = true.
-Proof.
-  intros. destruct ty; reflexivity.
-Qed.
-
-Lemma wt_regset_assign2:
-  forall env rs v r ty,
-  wt_regset env rs ->
-  Val.has_type v ty ->
-  env r = normalize ty ->
-  wt_regset env (rs#r <- v).
-Proof.
-  intros. eapply wt_regset_assign; eauto.
-  rewrite H1. eapply Val.has_subtype; eauto. apply normalize_subtype.
-Qed.
-
 Lemma wt_regset_list:
   forall env rs,
   wt_regset env rs ->
@@ -684,9 +660,8 @@ Proof.
   intros. inv H. 
   simpl in H0. inv H0. apply wt_regset_assign; auto.
   rewrite H4; auto.
-  eapply wt_regset_assign2; auto.
-  eapply type_of_operation_sound; eauto.
-  auto.
+  eapply wt_regset_assign; auto.
+  rewrite H8. eapply type_of_operation_sound; eauto.
 Qed.
 
 Lemma wt_exec_Iload:
@@ -697,8 +672,7 @@ Lemma wt_exec_Iload:
   wt_regset env (rs#dst <- v).
 Proof.
   intros. destruct a; simpl in H0; try discriminate. inv H.
-  eapply wt_regset_assign2; eauto.
-  eapply Mem.load_type; eauto.
+  eapply wt_regset_assign; eauto. rewrite H8; eapply Mem.load_type; eauto.
 Qed.
 
 Lemma wt_exec_Ibuiltin:
@@ -709,8 +683,8 @@ Lemma wt_exec_Ibuiltin:
   wt_regset env (rs#res <- vres).
 Proof.
   intros. inv H. 
-  eapply wt_regset_assign2; eauto. 
-  eapply external_call_well_typed; eauto.
+  eapply wt_regset_assign; eauto. 
+  rewrite H7; eapply external_call_well_typed; eauto.
 Qed.
 
 Lemma wt_instr_at:
@@ -728,7 +702,7 @@ Inductive wt_stackframes: list stackframe -> signature -> Prop :=
       forall s res f sp pc rs env sg,
       wt_function f env ->
       wt_regset env rs ->
-      env res = normalize (proj_sig_res sg) ->
+      env res = proj_sig_res sg ->
       wt_stackframes s (fn_sig f) ->
       wt_stackframes (Stackframe res f sp pc rs :: s) sg.
 
@@ -743,12 +717,12 @@ Inductive wt_state: state -> Prop :=
       forall s f args m,
       wt_stackframes s (funsig f) ->
       wt_fundef f ->
-      Val.has_type_list args (normalize_list (sig_args (funsig f))) ->
+      Val.has_type_list args (sig_args (funsig f)) ->
       wt_state (Callstate s f args m)
   | wt_state_return:
       forall s v m sg,
       wt_stackframes s sg ->
-      Val.has_type v (normalize (proj_sig_res sg)) ->
+      Val.has_type v (proj_sig_res sg) ->
       wt_state (Returnstate s v m).
 
 Remark wt_stackframes_change_sig:
@@ -814,15 +788,13 @@ Proof.
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto. 
-  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. rewrite <- H3. auto. 
+  inv WTI; simpl. auto. unfold proj_sig_res; rewrite H2. auto. 
   (* internal function *)
   simpl in *. inv H5.
   econstructor; eauto.
   inv H1. apply wt_init_regs; auto. rewrite wt_params0. auto. 
   (* external function *)
   econstructor; eauto. simpl.  
-  change (Val.has_type res (normalize (proj_sig_res (ef_sig ef)))).
-  eapply Val.has_subtype. apply normalize_subtype. 
   eapply external_call_well_typed; eauto.
   (* return *)
   inv H1. econstructor; eauto.
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index b736f29..687cbbd 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -92,7 +92,14 @@ let rec constrain_regs vl cl =
   | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl'
 
 let move v1 v2 k =
-  if v1 = v2 then k else Xmove(v1, v2) :: k
+  if v1 = v2 then
+    k
+  else if is_stack_reg v1 then begin
+    let t = new_temp (typeof v2) in Xmove(v1, t) :: Xmove(t, v2) :: k
+  end else if is_stack_reg v2 then begin
+    let t = new_temp (typeof v1) in Xmove(v1, t) :: Xmove(t, v2) :: k
+  end else
+    Xmove(v1, v2) :: k
 
 let rec movelist vl1 vl2 k =
   match vl1, vl2 with
@@ -104,7 +111,7 @@ let xparmove srcs dsts k =
   assert (List.length srcs = List.length dsts);
   match srcs, dsts with
   | [], [] -> k
-  | [src], [dst] -> Xmove(src, dst) :: k
+  | [src], [dst] -> move src dst k
   | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k
 
 (* Return the XTL basic block corresponding to the given RTL instruction.
@@ -365,9 +372,15 @@ let spill_costs f =
     | L l -> ()
     | V(r, ty) ->
         let st = get_stats r in
-        let c1 = st.cost + amount in
-        let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
-        st.cost <- c2;
+        if st.cost < 0 then
+          (* the variable must be spilled, don't change its cost *)
+          assert (amount < max_int)
+        else begin
+          (* saturating addition *)
+          let c1 = st.cost + amount in
+          let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
+          st.cost <- c2
+        end;           
         st.usedefs <- st.usedefs + uses in
 
   let charge_list amount uses vl =
@@ -376,9 +389,23 @@ let spill_costs f =
   let charge_ros amount ros =
     match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in
 
+  let force_stack_allocation v =
+    match v with
+    | L l -> ()
+    | V(r, ty) ->
+        let st = get_stats r in
+        assert (st.cost < max_int);
+        st.cost <- (-1) in
+
   let charge_instr = function
     | Xmove(src, dst) ->
-        charge 1 1 src; charge 1 1 dst
+        if is_stack_reg src then
+          force_stack_allocation dst
+        else if is_stack_reg dst then
+          force_stack_allocation src
+        else begin
+          charge 1 1 src; charge 1 1 dst
+        end
     | Xreload(src, dst) ->
         charge 1 1 src; charge max_int 1 dst
         (* dest must not be spilled! *)
@@ -491,10 +518,13 @@ let add_interfs_instr g instr live =
             add_interfs_list g (vmreg mr) srcs;
             IRC.add_interf g (vmreg mr) ftmp)
           (destroyed_by_setstack Tsingle)
+  | Xop(Ofloatofsingle, arg1::_, res) when Configuration.arch = "powerpc" ->
+      add_interfs_def g res live;
+      IRC.add_pref g arg1 res
   | Xop(op, args, res) ->
       begin match is_two_address op args with
       | None ->
-          add_interfs_def g res live;
+          add_interfs_def g res live
       | Some(arg1, argl) ->
           (* Treat as "res := arg1; res := op(res, argl)" *)
           add_interfs_def g res live;
@@ -502,7 +532,7 @@ let add_interfs_instr g instr live =
           add_interfs_move g arg1 res
             (vset_addlist (res :: argl) (VSet.remove res live))
       end;
-      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op);
+      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
   | Xload(chunk, addr, args, dst) ->
       add_interfs_def g dst live;
       add_interfs_destroyed g (VSet.remove dst live) 
@@ -576,10 +606,16 @@ let tospill_instr alloc instr ts =
       then ts
       else VSet.add src (VSet.add dst ts)
   | Xreload(src, dst) ->
-      assert (is_reg alloc dst);
+      if not (is_reg alloc dst) then begin
+        printf "Error: %a was spilled\n" PrintXTL.var dst;
+        assert false
+      end;
       ts
   | Xspill(src, dst) ->
-      assert (is_reg alloc src);
+      if not (is_reg alloc src) then begin
+        printf "Error: %a was spilled\n" PrintXTL.var src;
+        assert false
+      end;
       ts
   | Xparmove(srcs, dsts, itmp, ftmp) ->
       assert (is_reg alloc itmp && is_reg alloc ftmp);
@@ -834,7 +870,9 @@ let make_parmove srcs dsts itmp ftmp k =
   assert (Array.length dst = n);
   let status = Array.make n To_move in
   let temp_for =
-    function Tint -> itmp | (Tfloat|Tsingle) -> ftmp | Tlong -> assert false in
+    function (Tint|Tany32) -> itmp
+           | (Tfloat|Tsingle|Tany64) -> ftmp
+           | Tlong -> assert false in
   let code = ref [] in
   let add_move s d =
     match s, d with
diff --git a/backend/SelectDiv.vp b/backend/SelectDiv.vp
index 938ce5d..a275a85 100644
--- a/backend/SelectDiv.vp
+++ b/backend/SelectDiv.vp
@@ -182,4 +182,16 @@ Nondetfunction divf (e1: expr) (e2: expr) :=
   | _ => Eop Odivf (e1 ::: e2 ::: Enil)
   end.
 
+Definition divfsimm (e: expr) (n: float32) :=
+  match Float32.exact_inverse n with
+  | Some n' => Eop Omulfs (e ::: Eop (Osingleconst n') Enil ::: Enil)
+  | None    => Eop Odivfs (e ::: Eop (Osingleconst n) Enil ::: Enil)
+  end.
+
+Nondetfunction divfs (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Osingleconst n2) Enil => divfsimm e1 n2
+  | _ => Eop Odivfs (e1 ::: e2 ::: Enil)
+  end.
+
 
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index 9228cde..d4bd4f5 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -570,4 +570,20 @@ Proof.
 - TrivialExists.
 Qed.
 
+Theorem eval_divfs:
+  forall le a b x y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (divfs a b) v /\ Val.lessdef (Val.divfs x y) v.
+Proof.
+  intros until y. unfold divfs. destruct (divfs_match b); intros.
+- unfold divfsimm. destruct (Float32.exact_inverse n2) as [n2' | ] eqn:EINV.
+  + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
+    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. 
+    simpl; eauto. 
+    destruct x; simpl; auto. erewrite Float32.div_mul_inverse; eauto. 
+  + TrivialExists. 
+- TrivialExists.
+Qed.
+
 End CMCONSTRS.
diff --git a/backend/SelectLong.vp b/backend/SelectLong.vp
index 0c1cbb3..ab4ab8c 100644
--- a/backend/SelectLong.vp
+++ b/backend/SelectLong.vp
@@ -138,6 +138,10 @@ Definition floatoflong (arg: expr) :=
   Eexternal hf.(i64_stod) sig_l_f (arg ::: Enil).
 Definition floatoflongu (arg: expr) :=
   Eexternal hf.(i64_utod) sig_l_f (arg ::: Enil).
+Definition longofsingle (arg: expr) := 
+  longoffloat (floatofsingle arg).
+Definition longuofsingle (arg: expr) :=
+  longuoffloat (floatofsingle arg).
 Definition singleoflong (arg: expr) :=
   Eexternal hf.(i64_stof) sig_l_s (arg ::: Enil).
 Definition singleoflongu (arg: expr) :=
diff --git a/backend/SelectLongproof.v b/backend/SelectLongproof.v
index ec0dd2c..c7c7ab2 100644
--- a/backend/SelectLongproof.v
+++ b/backend/SelectLongproof.v
@@ -435,6 +435,34 @@ Proof.
   auto.
 Qed.
 
+Theorem eval_longofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (longofsingle hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longofsingle.
+  destruct x; simpl in H0; inv H0. destruct (Float32.to_long f) as [n|] eqn:EQ; simpl in H2; inv H2.
+  exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+  apply Float32.to_long_double in EQ. 
+  eapply eval_longoffloat; eauto. simpl.
+  change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
+Theorem eval_longuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.longuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (longuofsingle hf a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold longuofsingle.
+  destruct x; simpl in H0; inv H0. destruct (Float32.to_longu f) as [n|] eqn:EQ; simpl in H2; inv H2.
+  exploit eval_floatofsingle; eauto. intros (v & A & B). simpl in B. inv B.
+  apply Float32.to_longu_double in EQ. 
+  eapply eval_longuoffloat; eauto. simpl.
+  change (Float.of_single f) with (Float32.to_double f); rewrite EQ; auto.
+Qed.
+
 Theorem eval_singleoflong:
   forall le a x y,
   eval_expr ge sp e m le a x ->
diff --git a/backend/Selection.v b/backend/Selection.v
index f62a888..9bd37ef 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -70,6 +70,7 @@ Definition sel_constant (cst: Cminor.constant) : expr :=
   match cst with
   | Cminor.Ointconst n => Eop (Ointconst n) Enil
   | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil
+  | Cminor.Osingleconst f => Eop (Osingleconst f) Enil
   | Cminor.Olongconst n => longconst n
   | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs
   | Cminor.Oaddrstack ofs => addrstack ofs
@@ -85,11 +86,18 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
   | Cminor.Onotint => notint arg
   | Cminor.Onegf => negf arg
   | Cminor.Oabsf => absf arg
+  | Cminor.Onegfs => negfs arg
+  | Cminor.Oabsfs => absfs arg
   | Cminor.Osingleoffloat => singleoffloat arg
+  | Cminor.Ofloatofsingle => floatofsingle arg
   | Cminor.Ointoffloat => intoffloat arg
   | Cminor.Ointuoffloat => intuoffloat arg
   | Cminor.Ofloatofint => floatofint arg
   | Cminor.Ofloatofintu => floatofintu arg
+  | Cminor.Ointofsingle => intofsingle arg
+  | Cminor.Ointuofsingle => intuofsingle arg
+  | Cminor.Osingleofint => singleofint arg
+  | Cminor.Osingleofintu => singleofintu arg
   | Cminor.Onegl => negl hf arg
   | Cminor.Onotl => notl arg
   | Cminor.Ointoflong => intoflong arg
@@ -99,6 +107,8 @@ Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
   | Cminor.Olonguoffloat => longuoffloat hf arg
   | Cminor.Ofloatoflong => floatoflong hf arg
   | Cminor.Ofloatoflongu => floatoflongu hf arg
+  | Cminor.Olongofsingle => longofsingle hf arg
+  | Cminor.Olonguofsingle => longuofsingle hf arg
   | Cminor.Osingleoflong => singleoflong hf arg
   | Cminor.Osingleoflongu => singleoflongu hf arg
   end.
@@ -122,6 +132,10 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   | Cminor.Osubf => subf arg1 arg2
   | Cminor.Omulf => mulf arg1 arg2
   | Cminor.Odivf => divf arg1 arg2
+  | Cminor.Oaddfs => addfs arg1 arg2
+  | Cminor.Osubfs => subfs arg1 arg2
+  | Cminor.Omulfs => mulfs arg1 arg2
+  | Cminor.Odivfs => divfs arg1 arg2
   | Cminor.Oaddl => addl hf arg1 arg2
   | Cminor.Osubl => subl hf arg1 arg2
   | Cminor.Omull => mull hf arg1 arg2
@@ -138,6 +152,7 @@ Definition sel_binop (op: Cminor.binary_operation) (arg1 arg2: expr) : expr :=
   | Cminor.Ocmp c => comp c arg1 arg2
   | Cminor.Ocmpu c => compu c arg1 arg2
   | Cminor.Ocmpf c => compf c arg1 arg2
+  | Cminor.Ocmpfs c => compfs c arg1 arg2
   | Cminor.Ocmpl c => cmpl c arg1 arg2
   | Cminor.Ocmplu c => cmplu c arg1 arg2
   end.
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 175b025..55977b4 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -191,11 +191,18 @@ Proof.
   apply eval_notint; auto.
   apply eval_negf; auto.
   apply eval_absf; auto.
+  apply eval_negfs; auto.
+  apply eval_absfs; auto.
   apply eval_singleoffloat; auto.
+  apply eval_floatofsingle; auto.
   eapply eval_intoffloat; eauto.
   eapply eval_intuoffloat; eauto.
   eapply eval_floatofint; eauto.
   eapply eval_floatofintu; eauto.
+  eapply eval_intofsingle; eauto.
+  eapply eval_intuofsingle; eauto.
+  eapply eval_singleofint; eauto.
+  eapply eval_singleofintu; eauto.
   eapply eval_negl; eauto.
   eapply eval_notl; eauto.
   eapply eval_intoflong; eauto.
@@ -205,6 +212,8 @@ Proof.
   eapply eval_longuoffloat; eauto.
   eapply eval_floatoflong; eauto.
   eapply eval_floatoflongu; eauto.
+  eapply eval_longofsingle; eauto.
+  eapply eval_longuofsingle; eauto.
   eapply eval_singleoflong; eauto.
   eapply eval_singleoflongu; eauto.
 Qed.
@@ -234,6 +243,10 @@ Proof.
   apply eval_subf; auto.
   apply eval_mulf; auto.
   apply eval_divf; auto.
+  apply eval_addfs; auto.
+  apply eval_subfs; auto.
+  apply eval_mulfs; auto.
+  apply eval_divfs; auto.
   eapply eval_addl; eauto.
   eapply eval_subl; eauto.
   eapply eval_mull; eauto.
@@ -250,6 +263,7 @@ Proof.
   apply eval_comp; auto.
   apply eval_compu; auto.
   apply eval_compf; auto.
+  apply eval_compfs; auto.
   exists v; split; auto. eapply eval_cmpl; eauto.
   exists v; split; auto. eapply eval_cmplu; eauto.
 Qed.
@@ -377,6 +391,7 @@ Proof.
   destruct cst; simpl in *; inv H. 
   exists (Vint i); split; auto. econstructor. constructor. auto. 
   exists (Vfloat f); split; auto. econstructor. constructor. auto.
+  exists (Vsingle f); split; auto. econstructor. constructor. auto. 
   exists (Val.longofwords (Vint (Int64.hiword i)) (Vint (Int64.loword i))); split.
   eapply eval_Eop. constructor. EvalOp. simpl; eauto. constructor. EvalOp. simpl; eauto. constructor. auto.
   simpl. rewrite Int64.ofwords_recompose. auto.
diff --git a/backend/Stacking.v b/backend/Stacking.v
index f7c16d1..4ee43bb 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -73,12 +73,12 @@ Definition save_callee_save_regs
 Definition save_callee_save_int (fe: frame_env) :=
   save_callee_save_regs 
     fe_num_int_callee_save index_int_callee_save FI_saved_int
-    Tint fe int_callee_save_regs.
+    Tany32 fe int_callee_save_regs.
 
 Definition save_callee_save_float (fe: frame_env) :=
   save_callee_save_regs
     fe_num_float_callee_save index_float_callee_save FI_saved_float 
-    Tfloat fe float_callee_save_regs.
+    Tany64 fe float_callee_save_regs.
 
 Definition save_callee_save (fe: frame_env) (k: Mach.code) :=
   save_callee_save_int fe (save_callee_save_float fe k).
@@ -103,12 +103,12 @@ Definition restore_callee_save_regs
 Definition restore_callee_save_int (fe: frame_env) :=
   restore_callee_save_regs 
     fe_num_int_callee_save index_int_callee_save FI_saved_int
-    Tint fe int_callee_save_regs.
+    Tany32 fe int_callee_save_regs.
 
 Definition restore_callee_save_float (fe: frame_env) :=
   restore_callee_save_regs
     fe_num_float_callee_save index_float_callee_save FI_saved_float 
-    Tfloat fe float_callee_save_regs.
+    Tany64 fe float_callee_save_regs.
 
 Definition restore_callee_save (fe: frame_env) (k: Mach.code) :=
   restore_callee_save_int fe (restore_callee_save_float fe k).
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index b69f1ec..28b155a 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -132,8 +132,8 @@ Definition type_of_index (idx: frame_index) :=
   | FI_retaddr => Tint
   | FI_local x ty => ty
   | FI_arg x ty => ty
-  | FI_saved_int x => Tint
-  | FI_saved_float x => Tfloat
+  | FI_saved_int x => Tany32
+  | FI_saved_float x => Tany64
   end.
 
 (** Non-overlap between the memory areas corresponding to two
@@ -194,8 +194,8 @@ Proof.
   destruct idx1; destruct idx2;
   simpl in V1; simpl in V2; repeat InvIndexValid; simpl in DIFF;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -211,8 +211,8 @@ Proof.
   destruct idx;
   simpl in V; repeat InvIndexValid;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -254,6 +254,17 @@ Proof.
   auto with align_4.
 Qed.
 
+Lemma offset_of_index_aligned_2:
+  forall idx, index_valid idx ->
+  (align_chunk (chunk_of_type (type_of_index idx)) | offset_of_index fe idx).
+Proof.
+  intros. replace (align_chunk (chunk_of_type (type_of_index idx))) with 4.
+  apply offset_of_index_aligned.
+  assert (type_of_index idx <> Tlong) by
+    (destruct idx; simpl; simpl in H; intuition congruence).
+  destruct (type_of_index idx); auto; congruence.
+Qed.
+
 Lemma fe_stack_data_aligned:
   (8 | fe_stack_data fe).
 Proof.
@@ -271,7 +282,7 @@ Lemma index_local_valid:
 Proof.
   unfold slot_within_bounds, slot_valid, index_valid; intros. 
   InvBooleans. 
-  split. destruct ty; congruence. auto.
+  split. destruct ty; auto || discriminate. auto.
 Qed.
 
 Lemma index_arg_valid:
@@ -281,7 +292,7 @@ Lemma index_arg_valid:
 Proof.
   unfold slot_within_bounds, slot_valid, index_valid; intros. 
   InvBooleans. 
-  split. destruct ty; congruence. auto.
+  split. destruct ty; auto || discriminate. auto. 
 Qed.
 
 Lemma index_saved_int_valid:
@@ -322,8 +333,8 @@ Proof.
   AddPosProps.
   destruct idx; simpl in V; repeat InvIndexValid;
   unfold offset_of_index, type_of_index;
+  change (AST.typesize Tany32) with 4; change (AST.typesize Tany64) with 8;
   change (AST.typesize Tint) with 4;
-  change (AST.typesize Tfloat) with 8;
   omega.
 Qed.
 
@@ -414,7 +425,7 @@ Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v' [A B]].
   assert (v' = v).
     destruct v; destruct (type_of_index idx); simpl in *;
-    try contradiction; auto. rewrite Floats.Float.singleoffloat_of_single in B; auto. 
+    try contradiction; auto.
   subst v'. auto.
 Qed.
 
@@ -479,11 +490,7 @@ Proof.
   rewrite size_type_chunk. 
   apply Mem.range_perm_implies with Freeable; auto with mem.
   apply offset_of_index_perm; auto.
-  replace (align_chunk (chunk_of_type (type_of_index idx))) with 4.
-  apply offset_of_index_aligned; auto.
-  assert (type_of_index idx <> Tlong).
-    destruct idx; simpl in *; tauto || congruence.
-  destruct (type_of_index idx); reflexivity || congruence. 
+  apply offset_of_index_aligned_2; auto.
   exists m'; auto.
 Qed.
 
@@ -514,7 +521,8 @@ Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v'' [A B]].
   exists v''; split; auto.
   inv H2; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto.
-  rewrite Floats.Float.singleoffloat_of_single; auto. 
+  econstructor; eauto.
+  econstructor; eauto.
   econstructor; eauto.
 Qed.
 
@@ -529,6 +537,8 @@ Proof.
   exists v''; split; auto.
   inv H1; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto. 
   econstructor; eauto.
+  econstructor; eauto.
+  econstructor; eauto.
 Qed.
 
 Lemma gso_index_contains_inj:
@@ -576,11 +586,7 @@ Proof.
   rewrite size_type_chunk.
   apply Mem.range_perm_implies with Freeable; auto with mem.
   apply offset_of_index_perm; auto. 
-  replace (align_chunk (chunk_of_type (type_of_index idx))) with 4. 
-  apply offset_of_index_aligned.
-  assert (type_of_index idx <> Tlong).
-    destruct idx; simpl in *; tauto || congruence.
-  destruct (type_of_index idx); reflexivity || congruence. 
+  apply offset_of_index_aligned_2; auto.
   intros [v C]. 
   exists v; split; auto. constructor; auto. 
 Qed.
@@ -844,18 +850,17 @@ Lemma agree_frame_set_local:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
-  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   unfold Locmap.set.
   destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)).
   eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. 
   simpl. simpl in d. intuition.
@@ -884,20 +889,19 @@ Lemma agree_frame_set_outgoing:
   forall j ls ls0 m sp m' sp' parent retaddr ofs ty v v' m'',
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
-  Val.has_type v ty ->
   val_inject j v v' ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto.
   red; left; congruence.
 (* outgoing *)
   unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
   eapply gso_index_contains_inj; eauto with stacking.
   red. red in d. intuition. 
@@ -1145,12 +1149,6 @@ Proof.
   apply check_mreg_list_incl; compute; auto.
 Qed.
 
-Remark destroyed_by_move_at_function_entry:
-  incl (destroyed_by_op Omove) destroyed_at_function_entry.
-Proof.
-  apply check_mreg_list_incl; compute; auto.
-Qed.
-
 Remark temp_for_parent_frame_caller_save:
   In temp_for_parent_frame destroyed_at_call.
 Proof.
@@ -1164,6 +1162,12 @@ Hint Resolve destroyed_by_op_caller_save destroyed_by_load_caller_save
     destroyed_by_cond_caller_save destroyed_by_jumptable_caller_save
     destroyed_at_function_entry_caller_save: stacking.
 
+Remark destroyed_by_setstack_function_entry:
+  forall ty, incl (destroyed_by_setstack ty) destroyed_at_function_entry.
+Proof.
+  destruct ty; apply check_mreg_list_incl; compute; auto. 
+Qed.
+
 Remark transl_destroyed_by_op:
   forall op e, destroyed_by_op (transl_op e op) = destroyed_by_op op.
 Proof.
@@ -1367,12 +1371,12 @@ Lemma save_callee_save_correct:
   /\ agree_regs j ls rs'.
 Proof.
   intros.
-  assert (UNDEF: forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef).
-    intros. unfold ls. apply LTL_undef_regs_same. apply destroyed_by_move_at_function_entry; auto.
+  assert (UNDEF: forall r ty, In r (destroyed_by_setstack ty) -> ls (R r) = Vundef).
+    intros. unfold ls. apply LTL_undef_regs_same. eapply destroyed_by_setstack_function_entry; eauto.
   exploit (save_callee_save_regs_correct 
              fe_num_int_callee_save
              index_int_callee_save
-             FI_saved_int Tint
+             FI_saved_int Tany32
              j cs fb sp int_callee_save_regs ls).
   intros. apply index_int_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_int_callee_save_pos; auto. assumption.
@@ -1380,8 +1384,7 @@ Proof.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply int_callee_save_type. auto.
-Local Transparent destroyed_by_setstack.
-  simpl. tauto.
+  eauto.
   auto.
   apply incl_refl. 
   apply int_callee_save_norepet.
@@ -1391,7 +1394,7 @@ Local Transparent destroyed_by_setstack.
   exploit (save_callee_save_regs_correct 
              fe_num_float_callee_save
              index_float_callee_save
-             FI_saved_float Tfloat
+             FI_saved_float Tany64
              j cs fb sp float_callee_save_regs ls).
   intros. apply index_float_callee_save_inj; auto. 
   intros. simpl. split. apply Zge_le. apply index_float_callee_save_pos; auto. assumption.
@@ -1399,7 +1402,7 @@ Local Transparent destroyed_by_setstack.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply float_callee_save_type. auto.
-  simpl. tauto. 
+  eauto.
   auto. 
   apply incl_refl. 
   apply float_callee_save_norepet.
@@ -1462,6 +1465,16 @@ Proof.
   left. apply Plt_ne; auto.
 Qed.
 
+Lemma undef_regs_type:
+  forall ty l rl ls,
+  Val.has_type (ls l) ty -> Val.has_type (LTL.undef_regs rl ls l) ty.
+Proof.
+  induction rl; simpl; intros.
+- auto.
+- unfold Locmap.set. destruct (Loc.eq (R a) l). red; auto. 
+  destruct (Loc.diff_dec (R a) l); auto. red; auto.
+Qed.
+
 (** As a corollary of the previous lemmas, we obtain the following
   correctness theorem for the execution of a function prologue
   (allocation of the frame + saving of the link and return address +
@@ -1732,7 +1745,7 @@ Proof.
              fe_num_int_callee_save
              index_int_callee_save
              FI_saved_int
-             Tint
+             Tany32
              int_callee_save_regs
              j cs fb sp' ls0 m'); auto.
   intros. unfold mreg_within_bounds. split; intros.
@@ -1750,7 +1763,7 @@ Proof.
              fe_num_float_callee_save
              index_float_callee_save
              FI_saved_float
-             Tfloat
+             Tany64
              float_callee_save_regs
              j cs fb sp' ls0 m'); auto.
   intros. unfold mreg_within_bounds. split; intros.
@@ -2318,8 +2331,8 @@ Proof.
   unfold parent_sp.
   assert (slot_valid f Outgoing pos ty = true).
     exploit loc_arguments_acceptable; eauto. intros [A B]. 
-    unfold slot_valid. unfold proj_sumbool. rewrite zle_true. 
-    destruct ty; reflexivity || congruence. omega.
+    unfold slot_valid. unfold proj_sumbool. rewrite zle_true by omega.
+    destruct ty; auto; congruence.
   assert (slot_within_bounds (function_bounds f) Outgoing pos ty).
     eauto.
   exploit agree_outgoing; eauto. intros [v [A B]].
@@ -2525,7 +2538,7 @@ Proof.
   apply agree_frame_set_reg; auto.
 
 - (* Lsetstack *)
-  exploit wt_state_setstack; eauto. intros (VTY & SV & SW). 
+  exploit wt_state_setstack; eauto. intros (SV & SW). 
   set (idx := match sl with
               | Local => FI_local ofs ty
               | Incoming => FI_link (*dummy*)
@@ -2552,15 +2565,15 @@ Proof.
     omega.
   apply match_stacks_change_mach_mem with m'; auto.
   eauto with mem. eauto with mem. intros. rewrite <- H1; eapply Mem.load_store_other; eauto. left; apply Plt_ne; auto. 
-  eauto. eauto. auto. 
+  eauto. eauto. 
   apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. 
   destruct sl.
   + eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS. 
+    apply destroyed_by_setstack_caller_save. auto. auto. auto.
     assumption. 
   + simpl in SW; discriminate.
   + eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_setstack_caller_save. auto. auto. auto. apply AGREGS.
+    apply destroyed_by_setstack_caller_save. auto. auto. auto.
     assumption.
   + eauto with coqlib.
 
@@ -2613,13 +2626,15 @@ Proof.
   apply plus_one. econstructor. 
   instantiate (1 := a'). rewrite <- A. apply eval_addressing_preserved. exact symbols_preserved.
   eexact C. eauto.
-  econstructor; eauto with coqlib.
+  econstructor. eauto.
   eapply match_stacks_parallel_stores. eexact MINJ. eexact B. eauto. eauto. auto. 
+  eauto. eauto. 
   rewrite transl_destroyed_by_store. 
   apply agree_regs_undef_regs; auto. 
   apply agree_frame_undef_locs; auto.
   eapply agree_frame_parallel_stores; eauto.
-  apply destroyed_by_store_caller_save. 
+  apply destroyed_by_store_caller_save.
+  eauto with coqlib.
 
 - (* Lcall *)
   exploit find_function_translated; eauto. intros [bf [tf' [A [B C]]]].
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index e293028..813944d 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -166,7 +166,7 @@ Definition store_init_data (ab: ablock) (p: Z) (id: init_data) : ablock :=
   | Init_int32 n => ablock_store Mint32 ab p (I n)
   | Init_int64 n => ablock_store Mint64 ab p (L n)
   | Init_float32 n => ablock_store Mfloat32 ab p
-                        (if propagate_float_constants tt then F n else ftop)
+                        (if propagate_float_constants tt then FS n else ftop)
   | Init_float64 n => ablock_store Mfloat64 ab p
                         (if propagate_float_constants tt then F n else ftop)
   | Init_addrof symb ofs => ablock_store Mint32 ab p (Ptr (Gl symb ofs))
@@ -922,7 +922,7 @@ Proof.
     eapply VMTOP; eauto.
   - exploit BC'INV; eauto. intros (b'' & delta & J'). 
     exploit Mem.loadbytes_inject. eexact IMEM. eauto. eauto. intros (bytes & A & B).
-    inv B. inv H3. eapply PMTOP; eauto. 
+    inv B. inv H3. inv H7. eapply PMTOP; eauto. 
   }
   (* Conclusions *)
   exists bc'; splitall.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index c8431bb..75dd9d2 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -509,13 +509,13 @@ Inductive aval : Type :=
   | Sgn (n: Z)
   | L (n: int64)
   | F (f: float)
-  | Fsingle
+  | FS (f: float32)
   | Ptr (p: aptr)
   | Ifptr (p: aptr).
 
 Definition eq_aval: forall (v1 v2: aval), {v1=v2} + {v1<>v2}.
 Proof.
-  intros. generalize zeq Int.eq_dec Int64.eq_dec Float.eq_dec eq_aptr; intros.
+  intros. generalize zeq Int.eq_dec Int64.eq_dec Float.eq_dec Float32.eq_dec eq_aptr; intros.
   decide equality. 
 Defined.
 
@@ -537,14 +537,14 @@ Inductive vmatch : val -> aval -> Prop :=
   | vmatch_Sgn_undef: forall n, vmatch Vundef (Sgn n)
   | vmatch_l: forall i, vmatch (Vlong i) (L i)
   | vmatch_f: forall f, vmatch (Vfloat f) (F f)
-  | vmatch_single: forall f, Float.is_single f -> vmatch (Vfloat f) Fsingle
-  | vmatch_single_undef: vmatch Vundef Fsingle
+  | vmatch_s: forall f, vmatch (Vsingle f) (FS f)
   | vmatch_ptr: forall b ofs p, pmatch b ofs p -> vmatch (Vptr b ofs) (Ptr p)
   | vmatch_ptr_undef: forall p, vmatch Vundef (Ptr p)
   | vmatch_ifptr_undef: forall p, vmatch Vundef (Ifptr p)
   | vmatch_ifptr_i: forall i p, vmatch (Vint i) (Ifptr p)
   | vmatch_ifptr_l: forall i p, vmatch (Vlong i) (Ifptr p)
   | vmatch_ifptr_f: forall f p, vmatch (Vfloat f) (Ifptr p)
+  | vmatch_ifptr_s: forall f p, vmatch (Vsingle f) (Ifptr p)
   | vmatch_ifptr_p: forall b ofs p, pmatch b ofs p -> vmatch (Vptr b ofs) (Ifptr p).
 
 Lemma vmatch_ifptr:
@@ -569,11 +569,14 @@ Proof. constructor. Qed.
 Lemma vmatch_ftop: forall f, vmatch (Vfloat f) ftop.
 Proof. intros; constructor. Qed.
 
+Lemma vmatch_ftop_single: forall f, vmatch (Vsingle f) ftop.
+Proof. intros; constructor. Qed.
+
 Lemma vmatch_undef_ftop: vmatch Vundef ftop.
 Proof. constructor. Qed.    
 
 Hint Constructors vmatch : va.
-Hint Resolve vmatch_itop vmatch_undef_itop vmatch_ftop vmatch_undef_ftop : va.
+Hint Resolve vmatch_itop vmatch_undef_itop vmatch_ftop vmatch_ftop_single vmatch_undef_ftop : va.
 
 (* Some properties about [is_uns] and [is_sgn]. *)
 
@@ -770,22 +773,21 @@ Inductive vge: aval -> aval -> Prop :=
   | vge_i: forall i, vge (I i) (I i)
   | vge_l: forall i, vge (L i) (L i)
   | vge_f: forall f, vge (F f) (F f)
+  | vge_s: forall f, vge (FS f) (FS f)
   | vge_uns_i: forall n i, 0 <= n -> is_uns n i -> vge (Uns n) (I i)
   | vge_uns_uns: forall n1 n2, n1 >= n2 -> vge (Uns n1) (Uns n2)
   | vge_sgn_i: forall n i, 0 < n -> is_sgn n i -> vge (Sgn n) (I i)
   | vge_sgn_sgn: forall n1 n2, n1 >= n2 -> vge (Sgn n1) (Sgn n2)
   | vge_sgn_uns: forall n1 n2, n1 > n2 -> vge (Sgn n1) (Uns n2)
-  | vge_single_f: forall f, Float.is_single f -> vge Fsingle (F f)
-  | vge_single: vge Fsingle Fsingle
   | vge_p_p: forall p q, pge p q -> vge (Ptr p) (Ptr q)
   | vge_ip_p: forall p q, pge p q -> vge (Ifptr p) (Ptr q)
   | vge_ip_ip: forall p q, pge p q -> vge (Ifptr p) (Ifptr q)
   | vge_ip_i: forall p i, vge (Ifptr p) (I i)
   | vge_ip_l: forall p i, vge (Ifptr p) (L i)
   | vge_ip_f: forall p f, vge (Ifptr p) (F f)
+  | vge_ip_s: forall p f, vge (Ifptr p) (FS f)
   | vge_ip_uns: forall p n, vge (Ifptr p) (Uns n)
-  | vge_ip_sgn: forall p n, vge (Ifptr p) (Sgn n)
-  | vge_ip_single: forall p, vge (Ifptr p) Fsingle.
+  | vge_ip_sgn: forall p n, vge (Ifptr p) (Sgn n).
 
 Hint Constructors vge : va.
 
@@ -836,12 +838,9 @@ Definition vlub (v w: aval) : aval :=
   | Sgn n1, Uns n2 => sgn (Z.max n1 (n2 + 1))
   | Sgn n1, Sgn n2 => sgn (Z.max n1 n2)
   | F f1, F f2 =>
-      if Float.eq_dec f1 f2 then v else
-      if Float.is_single_dec f1 && Float.is_single_dec f2 then Fsingle else ftop
-  | F f, Fsingle | Fsingle, F f =>
-      if Float.is_single_dec f then Fsingle else ftop
-  | Fsingle, Fsingle =>
-      Fsingle
+      if Float.eq_dec f1 f2 then v else ftop
+  | FS f1, FS f2 =>
+      if Float32.eq_dec f1 f2 then v else ftop
   | L i1, L i2 =>
       if Int64.eq i1 i2 then v else ltop
   | Ptr p1, Ptr p2 => Ptr(plub p1 p2)
@@ -865,8 +864,8 @@ Proof.
 - f_equal; apply Z.max_comm.
 - f_equal; apply Z.max_comm.
 - rewrite Int64.eq_sym. predSpec Int64.eq Int64.eq_spec n0 n; congruence.
-- rewrite dec_eq_sym. destruct (Float.eq_dec f0 f). congruence. 
-  rewrite andb_comm. auto.
+- rewrite dec_eq_sym. destruct (Float.eq_dec f0 f). congruence. auto.
+- rewrite dec_eq_sym. destruct (Float32.eq_dec f0 f). congruence. auto.
 - f_equal; apply plub_comm.
 - f_equal; apply plub_comm.
 - f_equal; apply plub_comm.
@@ -937,12 +936,8 @@ Proof.
 - eapply vge_trans. apply vge_sgn_sgn'. eauto with va.
 - eapply vge_trans. apply vge_sgn_sgn'. eauto with va.
 - destruct (Int64.eq n n0); constructor.
-- destruct (Float.eq_dec f f0). constructor.
-  destruct (Float.is_single_dec f && Float.is_single_dec f0) eqn:FS.
-  InvBooleans. auto with va.
-  constructor.
-- destruct (Float.is_single_dec f); constructor; auto. 
-- destruct (Float.is_single_dec f); constructor; auto.
+- destruct (Float.eq_dec f f0); constructor.
+- destruct (Float32.eq_dec f f0); constructor.
 - constructor; apply pge_lub_l; auto.
 - constructor; apply pge_lub_l; auto.
 - constructor; apply pge_lub_l; auto.
@@ -1043,8 +1038,7 @@ Definition vincl (v w: aval) : bool :=
   | Sgn n, Sgn m => zle n m
   | L i, L j => Int64.eq_dec i j
   | F i, F j => Float.eq_dec i j
-  | F i, Fsingle => Float.is_single_dec i
-  | Fsingle, Fsingle => true
+  | FS i, FS j => Float32.eq_dec i j
   | Ptr p, Ptr q => pincl p q
   | Ptr p, Ifptr q => pincl p q
   | Ifptr p, Ifptr q => pincl p q
@@ -1063,7 +1057,7 @@ Proof.
   InvBooleans. constructor; auto with va.
   InvBooleans. subst; auto with va.
   InvBooleans. subst; auto with va.
-  InvBooleans. auto with va.
+  InvBooleans. subst; auto with va.
   constructor; apply pincl_ge; auto.
   constructor; apply pincl_ge; auto.
   constructor; apply pincl_ge; auto.
@@ -1154,6 +1148,28 @@ Proof.
   intros. unfold binop_float; inv H; auto with va; inv H0; auto with va.
 Qed.
 
+Definition unop_single (sem: float32 -> float32) (x: aval) :=
+  match x with FS n => FS (sem n) | _ => ftop end.
+
+Lemma unop_single_sound:
+  forall sem v x,
+  vmatch v x ->
+  vmatch (match v with Vsingle i => Vsingle(sem i) | _ => Vundef end) (unop_single sem x).
+Proof.
+  intros. unfold unop_single; inv H; auto with va.
+Qed.
+
+Definition binop_single (sem: float32 -> float32 -> float32) (x y: aval) :=
+  match x, y with FS n, FS m => FS (sem n m) | _, _ => ftop end.
+
+Lemma binop_single_sound:
+  forall sem v x w y,
+  vmatch v x -> vmatch w y ->
+  vmatch (match v, w with Vsingle i, Vsingle j => Vsingle(sem i j) | _, _ => Vundef end) (binop_single sem x y).
+Proof.
+  intros. unfold binop_single; inv H; auto with va; inv H0; auto with va.
+Qed.
+
 (** Logical operations *)
 
 Definition shl (v w: aval) :=
@@ -1636,6 +1652,42 @@ Lemma divf_sound:
   forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divf v w) (divf x y).
 Proof (binop_float_sound Float.div).
 
+Definition negfs := unop_single Float32.neg.
+
+Lemma negfs_sound: 
+  forall v x, vmatch v x -> vmatch (Val.negfs v) (negfs x).
+Proof (unop_single_sound Float32.neg).
+
+Definition absfs := unop_single Float32.abs.
+
+Lemma absfs_sound: 
+  forall v x, vmatch v x -> vmatch (Val.absfs v) (absfs x).
+Proof (unop_single_sound Float32.abs).
+
+Definition addfs := binop_single Float32.add.
+
+Lemma addfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.addfs v w) (addfs x y).
+Proof (binop_single_sound Float32.add).
+
+Definition subfs := binop_single Float32.sub.
+
+Lemma subfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.subfs v w) (subfs x y).
+Proof (binop_single_sound Float32.sub).
+
+Definition mulfs := binop_single Float32.mul.
+
+Lemma mulfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.mulfs v w) (mulfs x y).
+Proof (binop_single_sound Float32.mul).
+
+Definition divfs := binop_single Float32.div.
+
+Lemma divfs_sound: 
+  forall v x w y, vmatch v x -> vmatch w y -> vmatch (Val.divfs v w) (divfs x y).
+Proof (binop_single_sound Float32.div).
+
 (** Conversions *)
 
 Definition zero_ext (nbits: Z) (v: aval) :=
@@ -1683,23 +1735,38 @@ Qed.
 
 Definition singleoffloat (v: aval) :=
   match v with
-  | F f => F (Float.singleoffloat f)
-  | _   => Fsingle
+  | F f => FS (Float.to_single f)
+  | _   => ftop
   end.
 
 Lemma singleoffloat_sound:
   forall v x, vmatch v x -> vmatch (Val.singleoffloat v) (singleoffloat x).
 Proof.
-  intros. 
-  assert (DEFAULT: vmatch (Val.singleoffloat v) Fsingle).
-  { destruct v; constructor. apply Float.singleoffloat_is_single. }
+  intros.  
+  assert (DEFAULT: vmatch (Val.singleoffloat v) ftop).
+  { destruct v; constructor. }
+  destruct x; auto. inv H. constructor. 
+Qed.
+
+Definition floatofsingle (v: aval) :=
+  match v with
+  | FS f => F (Float.of_single f)
+  | _   => ftop
+  end.
+
+Lemma floatofsingle_sound:
+  forall v x, vmatch v x -> vmatch (Val.floatofsingle v) (floatofsingle x).
+Proof.
+  intros.  
+  assert (DEFAULT: vmatch (Val.floatofsingle v) ftop).
+  { destruct v; constructor. }
   destruct x; auto. inv H. constructor. 
 Qed.
 
 Definition intoffloat (x: aval) :=
   match x with
   | F f =>
-      match Float.intoffloat f with
+      match Float.to_int f with
       | Some i => I i
       | None => if va_strict tt then Vbot else itop
       end
@@ -1710,14 +1777,14 @@ Lemma intoffloat_sound:
   forall v x w, vmatch v x -> Val.intoffloat v = Some w -> vmatch w (intoffloat x).
 Proof.
   unfold Val.intoffloat; intros. destruct v; try discriminate. 
-  destruct (Float.intoffloat f) as [i|] eqn:E; simpl in H0; inv H0.
+  destruct (Float.to_int f) as [i|] eqn:E; simpl in H0; inv H0.
   inv H; simpl; auto with va. rewrite E; constructor. 
 Qed.
 
 Definition intuoffloat (x: aval) :=
   match x with
   | F f =>
-      match Float.intuoffloat f with
+      match Float.to_intu f with
       | Some i => I i
       | None => if va_strict tt then Vbot else itop
       end
@@ -1728,13 +1795,13 @@ Lemma intuoffloat_sound:
   forall v x w, vmatch v x -> Val.intuoffloat v = Some w -> vmatch w (intuoffloat x).
 Proof.
   unfold Val.intuoffloat; intros. destruct v; try discriminate. 
-  destruct (Float.intuoffloat f) as [i|] eqn:E; simpl in H0; inv H0.
+  destruct (Float.to_intu f) as [i|] eqn:E; simpl in H0; inv H0.
   inv H; simpl; auto with va. rewrite E; constructor. 
 Qed.
 
 Definition floatofint (x: aval) :=
   match x with
-  | I i => F(Float.floatofint i)
+  | I i => F(Float.of_int i)
   | _   => ftop
   end.
 
@@ -1747,7 +1814,7 @@ Qed.
 
 Definition floatofintu (x: aval) :=
   match x with
-  | I i => F(Float.floatofintu i)
+  | I i => F(Float.of_intu i)
   | _   => ftop
   end.
 
@@ -1758,6 +1825,68 @@ Proof.
   inv H; simpl; auto with va.
 Qed.
 
+Definition intofsingle (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_int f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else itop
+      end
+  | _ => itop
+  end.
+
+Lemma intofsingle_sound:
+  forall v x w, vmatch v x -> Val.intofsingle v = Some w -> vmatch w (intofsingle x).
+Proof.
+  unfold Val.intofsingle; intros. destruct v; try discriminate. 
+  destruct (Float32.to_int f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor. 
+Qed.
+
+Definition intuofsingle (x: aval) :=
+  match x with
+  | FS f =>
+      match Float32.to_intu f with
+      | Some i => I i
+      | None => if va_strict tt then Vbot else itop
+      end
+  | _ => itop
+  end.
+
+Lemma intuofsingle_sound:
+  forall v x w, vmatch v x -> Val.intuofsingle v = Some w -> vmatch w (intuofsingle x).
+Proof.
+  unfold Val.intuofsingle; intros. destruct v; try discriminate. 
+  destruct (Float32.to_intu f) as [i|] eqn:E; simpl in H0; inv H0.
+  inv H; simpl; auto with va. rewrite E; constructor. 
+Qed.
+
+Definition singleofint (x: aval) :=
+  match x with
+  | I i => FS(Float32.of_int i)
+  | _   => ftop
+  end.
+
+Lemma singleofint_sound:
+  forall v x w, vmatch v x -> Val.singleofint v = Some w -> vmatch w (singleofint x).
+Proof.
+  unfold Val.singleofint; intros. destruct v; inv H0.
+  inv H; simpl; auto with va.
+Qed.
+
+Definition singleofintu (x: aval) :=
+  match x with
+  | I i => FS(Float32.of_intu i)
+  | _   => ftop
+  end.
+
+Lemma singleofintu_sound:
+  forall v x w, vmatch v x -> Val.singleofintu v = Some w -> vmatch w (singleofintu x).
+Proof.
+  unfold Val.singleofintu; intros. destruct v; inv H0.
+  inv H; simpl; auto with va.
+Qed.
+
 Definition floatofwords (x y: aval) :=
   match x, y with
   | I i, I j => F(Float.from_words i j)
@@ -1862,6 +1991,18 @@ Proof.
   intros. inv H; try constructor; inv H0; constructor. 
 Qed.
 
+Definition cmpfs_bool (c: comparison) (v w: aval) : abool :=
+  match v, w with
+  | FS f1, FS f2 => Just (Float32.cmp c f1 f2)
+  | _, _ => Btop
+  end.
+
+Lemma cmpfs_bool_sound:
+  forall c v w x y, vmatch v x -> vmatch w y -> cmatch (Val.cmpfs_bool c v w) (cmpfs_bool c x y).
+Proof.
+  intros. inv H; try constructor; inv H0; constructor. 
+Qed.
+
 Definition maskzero (x: aval) (mask: int) : abool :=
   match x with
   | I i => Just (Int.eq (Int.and i mask) Int.zero)
@@ -1939,9 +2080,10 @@ Definition vnormalize (chunk: memory_chunk) (v: aval) :=
   | Mint16unsigned, _ => Uns 16
   | Mint32, (I _ | Ptr _ | Ifptr _) => v
   | Mint64, L _ => v
-  | Mfloat32, F f => F (Float.singleoffloat f)
-  | Mfloat32, _ => Fsingle
+  | Mfloat32, FS f => v
   | Mfloat64, F f => v
+  | Many32, (I _ | Ptr _ | Ifptr _ | FS _) => v
+  | Many64, _ => v
   | _, _ => Ifptr Pbot
   end.
 
@@ -1963,12 +2105,10 @@ Proof.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
 - constructor. xomega. apply is_sign_ext_sgn; auto with va. apply Z.min_case; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - constructor. omega. apply is_sign_ext_sgn; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
 - constructor. omega. apply is_sign_ext_sgn; auto with va.
 - constructor. omega. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 Qed.
 
 Lemma vnormalize_cast:
@@ -1992,9 +2132,13 @@ Proof.
 - (* int64 *)
   destruct v; try contradiction; constructor.
 - (* float32 *)
-  rewrite H2. destruct v; simpl; constructor. apply Float.singleoffloat_is_single. 
+  destruct v; try contradiction; constructor.
 - (* float64 *)
   destruct v; try contradiction; constructor.
+- (* any32 *)
+  auto.
+- (* any64 *)
+  auto.
 Qed.
 
 Lemma vnormalize_monotone:
@@ -2024,12 +2168,10 @@ Proof.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
 - destruct (zlt n2 8); constructor; auto with va.
 - destruct (zlt n2 16); constructor; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - constructor; auto with va. apply is_sign_ext_sgn; auto with va.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
 - constructor; auto with va. apply is_sign_ext_sgn; auto with va.
 - constructor; auto with va. apply is_zero_ext_uns; auto with va.
-- constructor. apply Float.singleoffloat_is_single.
 - destruct (zlt n 8); constructor; auto with va.
 - destruct (zlt n 16); constructor; auto with va.
 Qed.
@@ -2064,6 +2206,8 @@ Definition chunk_compat (chunk chunk': memory_chunk) : bool :=
   | Mfloat32, Mfloat32 => true
   | Mint64, Mint64 => true
   | Mfloat64, Mfloat64 => true
+  | Many32, Many32 => true
+  | Many64, Many64 => true
   | _, _ => false
   end.
 
@@ -2148,7 +2292,7 @@ Definition ablock_storebytes_anywhere (ab: ablock) (p: aptr) :=
 
 Definition smatch (m: mem) (b: block) (p: aptr) : Prop :=
   (forall chunk ofs v, Mem.load chunk m b ofs = Some v -> vmatch v (Ifptr p))
-/\(forall ofs b' ofs' i, Mem.loadbytes m b ofs 1 = Some (Pointer b' ofs' i :: nil) -> pmatch b' ofs' p).
+/\(forall ofs b' ofs' q i, Mem.loadbytes m b ofs 1 = Some (Fragment (Vptr b' ofs') q i :: nil) -> pmatch b' ofs' p).
 
 Remark loadbytes_load_ext:
   forall b m m',
@@ -2219,10 +2363,10 @@ Proof.
 Qed.
 
 Lemma smatch_loadbytes:
-  forall m b p b' ofs' i n ofs bytes,
+  forall m b p b' ofs' q i n ofs bytes,
   Mem.loadbytes m b ofs n = Some bytes ->
   smatch m b p ->
-  In (Pointer b' ofs' i) bytes ->
+  In (Fragment (Vptr b' ofs') q i) bytes ->
   pmatch b' ofs' p.
 Proof.
   intros. exploit In_loadbytes; eauto. intros (ofs1 & A & B).
@@ -2251,11 +2395,11 @@ Proof.
 Qed.
 
 Lemma storebytes_provenance:
-  forall m b ofs bytes m' b' ofs' b'' ofs'' i,
+  forall m b ofs bytes m' b' ofs' b'' ofs'' q i,
   Mem.storebytes m b ofs bytes = Some m' ->
-  Mem.loadbytes m' b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil) ->
-  In (Pointer b'' ofs'' i) bytes
-  \/ Mem.loadbytes m b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil).
+  Mem.loadbytes m' b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil) ->
+  In (Fragment (Vptr b'' ofs'') q i) bytes
+  \/ Mem.loadbytes m b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil).
 Proof.
   intros. 
   assert (EITHER:
@@ -2275,26 +2419,24 @@ Proof.
 Qed.
 
 Lemma store_provenance:
-  forall chunk m b ofs v m' b' ofs' b'' ofs'' i,
+  forall chunk m b ofs v m' b' ofs' b'' ofs'' q i,
   Mem.store chunk m b ofs v = Some m' ->
-  Mem.loadbytes m' b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil) ->
-  v = Vptr b'' ofs'' /\ chunk = Mint32
-  \/ Mem.loadbytes m b' ofs' 1 = Some (Pointer b'' ofs'' i :: nil).
+  Mem.loadbytes m' b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil) ->
+  v = Vptr b'' ofs'' /\ (chunk = Mint32 \/ chunk = Many32 \/ chunk = Many64)
+  \/ Mem.loadbytes m b' ofs' 1 = Some (Fragment (Vptr b'' ofs'') q i :: nil).
 Proof.
   intros. exploit storebytes_provenance; eauto. eapply Mem.store_storebytes; eauto. 
   intros [A|A]; auto. left.
-  assert (IN_ENC_BYTES: forall bl, ~In (Pointer b'' ofs'' i) (inj_bytes bl)).
-  {
-    induction bl; simpl. tauto. red; intros; elim IHbl. destruct H1. congruence. auto. 
-  }
-  assert (IN_REP_UNDEF: forall n, ~In (Pointer b'' ofs'' i) (list_repeat n Undef)).
-  {
-    intros; red; intros. exploit in_list_repeat; eauto. congruence.
-  }
-  unfold encode_val in A; destruct chunk, v;
-  try (eelim IN_ENC_BYTES; eassumption);
-  try (eelim IN_REP_UNDEF; eassumption).
-  simpl in A. split; auto. intuition congruence.
+  generalize (encode_val_shape chunk v). intros ENC; inv ENC.
+- split; auto. rewrite <- H1 in A; destruct A.
+  + congruence.
+  + exploit H5; eauto. intros (j & P & Q); congruence.
+- rewrite <- H1 in A; destruct A. 
+  + congruence.
+  + exploit H3; eauto. intros [byte P]; congruence. 
+- rewrite <- H1 in A; destruct A.
+  + congruence.
+  + exploit H2; eauto. congruence.
 Qed.
 
 Lemma smatch_store:
@@ -2307,7 +2449,7 @@ Proof.
   intros. destruct H0 as [A B]. split.
 - intros chunk' ofs' v' LOAD. destruct v'; auto with va.
   exploit Mem.load_pointer_store; eauto. 
-  intros [(P & Q & R & S & T) | DISJ]. 
+  intros [(P & Q & R & S) | DISJ]. 
 + subst. apply vmatch_vplub_l. auto.
 + apply vmatch_vplub_r. apply A with (chunk := chunk') (ofs := ofs').
   rewrite <- LOAD. symmetry. eapply Mem.load_store_other; eauto.
@@ -2325,17 +2467,20 @@ Lemma smatch_storebytes:
   forall m b ofs bytes m' b' p p',
   Mem.storebytes m b ofs bytes = Some m' ->
   smatch m b' p ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p') ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p') ->
   smatch m' b' (plub p' p).
 Proof.
   intros. destruct H0 as [A B]. split.
 - intros. apply vmatch_ifptr. intros bx ofsx EQ; subst v. 
   exploit Mem.load_loadbytes; eauto. intros (bytes' & P & Q).
-  exploit decode_val_pointer_inv; eauto. intros [U V]. 
-  subst chunk bytes'. 
-  exploit In_loadbytes; eauto.
-  instantiate (1 := Pointer bx ofsx 3%nat). simpl; auto.
-  intros (ofs' & X & Y).
+  destruct bytes' as [ | byte1' bytes']. 
+  exploit Mem.loadbytes_length; eauto. intros. destruct chunk; discriminate.
+  generalize (decode_val_shape chunk byte1' bytes'). rewrite <- Q. 
+  intros DEC; inv DEC; try contradiction.
+  assert (v = Vptr bx ofsx).
+  { destruct H5 as [E|[E|E]]; rewrite E in H4; destruct v; simpl in H4; congruence. }
+  exploit In_loadbytes; eauto. eauto with coqlib. 
+  intros (ofs' & X & Y). subst v. 
   exploit storebytes_provenance; eauto. intros [Z | Z].
   apply pmatch_lub_l. eauto. 
   apply pmatch_lub_r. eauto. 
@@ -2530,10 +2675,10 @@ Proof.
 Qed.
 
 Lemma ablock_loadbytes_sound:
-  forall m b ab b' ofs' i n ofs bytes,
+  forall m b ab b' ofs' q i n ofs bytes,
   Mem.loadbytes m b ofs n = Some bytes ->
   bmatch m b ab ->
-  In (Pointer b' ofs' i) bytes ->
+  In (Fragment (Vptr b' ofs') q i) bytes ->
   pmatch b' ofs' (ablock_loadbytes ab).
 Proof.
   intros. destruct H0. eapply smatch_loadbytes; eauto. 
@@ -2542,7 +2687,7 @@ Qed.
 Lemma ablock_storebytes_anywhere_sound:
   forall m b ofs bytes p m' b' ab,
   Mem.storebytes m b ofs bytes = Some m' ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p) ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p) ->
   bmatch m b' ab ->
   bmatch m' b' (ablock_storebytes_anywhere ab p).
 Proof.
@@ -2566,7 +2711,7 @@ Lemma ablock_storebytes_sound:
   forall m b ofs bytes m' p ab sz,
   Mem.storebytes m b ofs bytes = Some m' ->
   length bytes = nat_of_Z sz ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' p) ->
+  (forall b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' p) ->
   bmatch m b ab ->
   bmatch m' b (ablock_storebytes ab p ofs sz).
 Proof.
@@ -3060,7 +3205,7 @@ Theorem loadbytes_sound:
   romatch m rm ->
   mmatch m am ->
   pmatch b ofs p ->
-  forall  b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' (loadbytes am rm p).
+  forall  b' ofs' q i, In (Fragment (Vptr b' ofs') q i) bytes -> pmatch b' ofs' (loadbytes am rm p).
 Proof.
   intros. unfold loadbytes; inv H2.
 - (* Gl id ofs *)
@@ -3093,7 +3238,7 @@ Theorem storebytes_sound:
   mmatch m am ->
   pmatch b ofs p ->
   length bytes = nat_of_Z sz ->
-  (forall b' ofs' i, In (Pointer b' ofs' i) bytes -> pmatch b' ofs' q) ->
+  (forall b' ofs' qt i, In (Fragment (Vptr b' ofs') qt i) bytes -> pmatch b' ofs' q) ->
   mmatch m' (storebytes am p sz q).
 Proof.
   intros until q; intros STORE MM PM LENGTH BYTES.
@@ -3403,7 +3548,7 @@ Proof.
     unfold Mem.loadbytes. rewrite pred_dec_true. reflexivity. 
     red; intros. replace ofs0 with ofs by omega. auto.
   }
-  destruct mv; econstructor.
+  destruct mv; econstructor. destruct v; econstructor. 
   apply inj_of_bc_valid.
   assert (PM: pmatch bc b i Ptop).
   { exploit mmatch_top; eauto. intros [P Q].
@@ -3456,7 +3601,7 @@ Proof.
     - exploit Mem.load_inject. eauto. eauto. apply inj_of_bc_valid; auto. 
       intros (v' & A & B). eapply vmatch_inj_top; eauto. 
     - exploit Mem.loadbytes_inject. eauto. eauto. apply inj_of_bc_valid; auto. 
-      intros (bytes' & A & B). inv B. inv H4. eapply pmatch_inj_top; eauto. 
+      intros (bytes' & A & B). inv B. inv H4. inv H8. eapply pmatch_inj_top; eauto. 
   }
   constructor; simpl; intros. 
   - apply ablock_init_sound. apply SM. congruence.
@@ -3701,7 +3846,11 @@ Hint Resolve cnot_sound symbol_address_sound
        divs_sound divu_sound mods_sound modu_sound shrx_sound
        negf_sound absf_sound
        addf_sound subf_sound mulf_sound divf_sound
-       zero_ext_sound sign_ext_sound singleoffloat_sound
+       negfs_sound absfs_sound
+       addfs_sound subfs_sound mulfs_sound divfs_sound
+       zero_ext_sound sign_ext_sound singleoffloat_sound floatofsingle_sound
        intoffloat_sound intuoffloat_sound floatofint_sound floatofintu_sound
+       intofsingle_sound intuofsingle_sound singleofint_sound singleofintu_sound
        longofwords_sound loword_sound hiword_sound 
-       cmpu_bool_sound cmp_bool_sound cmpf_bool_sound maskzero_sound : va.
+       cmpu_bool_sound cmp_bool_sound cmpf_bool_sound cmpfs_bool_sound
+       maskzero_sound : va.
-- 
cgit v1.2.3