From be4d6e42dfa287b93b1a35ec820ab2a5aaf8c7ec Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sun, 19 May 2013 09:54:40 +0000
Subject: Merge of the float32 branch: - added RTL type "Tsingle" -
 ABI-compatible passing of single-precision floats on ARM and x86

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2260 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 .depend                      |   3 +-
 Makefile                     |   2 +-
 arm/Asmgen.v                 |  36 +-
 arm/Asmgenproof.v            |  11 +-
 arm/Asmgenproof1.v           |  45 ++-
 arm/Machregs.v               |   8 +-
 arm/Op.v                     |  20 +-
 arm/PrintAsm.ml              |  55 ++-
 arm/eabi/Conventions1.v      |  62 +++-
 backend/Allocation.v         | 134 ++++---
 backend/Allocproof.v         | 314 ++++++++++------
 backend/CMtypecheck.ml       |  14 +-
 backend/CSEproof.v           |  33 +-
 backend/IRC.ml               |  12 +-
 backend/LTL.v                |   4 +-
 backend/Linear.v             |   2 +-
 backend/Lineartyping.v       |  54 +--
 backend/Locations.v          |  49 ++-
 backend/Mach.v               |   2 +-
 backend/PrintCminor.ml       |   1 +
 backend/PrintXTL.ml          |   6 +-
 backend/RTLtyping.v          | 834 +++++++++++++++++--------------------------
 backend/Regalloc.ml          |  14 +-
 backend/Stackingproof.v      |  81 +++--
 cfrontend/Cexec.v            |   7 +-
 cfrontend/Ctypes.v           |   2 +
 common/AST.v                 |  73 +++-
 common/Events.v              |  76 ++--
 common/Memdata.v             |   2 +-
 common/PrintAST.ml           |   6 +-
 common/Subtyping.v           | 808 +++++++++++++++++++++++++++++++++++++++++
 common/Values.v              |  45 ++-
 driver/Interp.ml             |   1 +
 ia32/Asm.v                   |  18 +-
 ia32/Asmgen.v                |   6 +
 ia32/Asmgenproof.v           |   4 +
 ia32/Asmgenproof1.v          |  14 +-
 ia32/Machregs.v              |   9 +-
 ia32/Op.v                    |  21 +-
 ia32/PrintAsm.ml             |  12 +-
 ia32/standard/Conventions1.v |   7 +-
 lib/Floats.v                 |  28 ++
 powerpc/Asmgen.v             |   4 +-
 powerpc/Asmgenproof.v        |   5 +-
 powerpc/Machregs.v           |   8 +-
 powerpc/Op.v                 |  21 +-
 powerpc/eabi/Conventions1.v  |  26 +-
 test/c/Makefile              |   4 +-
 test/c/Results/fftsp         |   1 +
 test/c/fftsp.c               | 196 ++++++++++
 50 files changed, 2237 insertions(+), 963 deletions(-)
 create mode 100644 common/Subtyping.v
 create mode 100644 test/c/Results/fftsp
 create mode 100644 test/c/fftsp.c

diff --git a/.depend b/.depend
index 057f4f9..17db282 100644
--- a/.depend
+++ b/.depend
@@ -25,6 +25,7 @@ common/Smallstep.vo common/Smallstep.glob common/Smallstep.v.beautified: common/
 common/Behaviors.vo common/Behaviors.glob common/Behaviors.v.beautified: common/Behaviors.v lib/Coqlib.vo common/Events.vo common/Globalenvs.vo lib/Integers.vo common/Smallstep.vo
 common/Switch.vo common/Switch.glob common/Switch.v.beautified: common/Switch.v lib/Coqlib.vo lib/Maps.vo lib/Integers.vo
 common/Determinism.vo common/Determinism.glob common/Determinism.v.beautified: common/Determinism.v lib/Coqlib.vo common/AST.vo lib/Integers.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo common/Behaviors.vo
+common/Subtyping.vo common/Subtyping.glob common/Subtyping.v.beautified: common/Subtyping.v lib/Coqlib.vo lib/Maps.vo common/Errors.vo
 backend/Cminor.vo backend/Cminor.glob backend/Cminor.v.beautified: backend/Cminor.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Events.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Smallstep.vo common/Switch.vo
 $(ARCH)/Op.vo $(ARCH)/Op.glob $(ARCH)/Op.v.beautified: $(ARCH)/Op.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo
 backend/CminorSel.vo backend/CminorSel.glob backend/CminorSel.v.beautified: backend/CminorSel.v lib/Coqlib.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Events.vo common/Values.vo common/Memory.vo backend/Cminor.vo $(ARCH)/Op.vo common/Globalenvs.vo common/Switch.vo common/Smallstep.vo
@@ -46,7 +47,7 @@ backend/Inliningspec.vo backend/Inliningspec.glob backend/Inliningspec.v.beautif
 backend/Inliningproof.vo backend/Inliningproof.glob backend/Inliningproof.v.beautified: backend/Inliningproof.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo lib/Integers.vo common/Values.vo common/Memory.vo common/Globalenvs.vo common/Events.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/Inlining.vo backend/Inliningspec.vo backend/RTL.vo
 backend/Renumber.vo backend/Renumber.glob backend/Renumber.v.beautified: backend/Renumber.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo backend/RTL.vo
 backend/Renumberproof.vo backend/Renumberproof.glob backend/Renumberproof.v.beautified: backend/Renumberproof.v lib/Coqlib.vo lib/Maps.vo lib/Postorder.vo common/Events.vo common/Globalenvs.vo common/Smallstep.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Renumber.vo
-backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Memory.vo common/Events.vo backend/RTL.vo backend/Conventions.vo
+backend/RTLtyping.vo backend/RTLtyping.glob backend/RTLtyping.v.beautified: backend/RTLtyping.v lib/Coqlib.vo common/Errors.vo common/Subtyping.vo lib/Maps.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo common/Globalenvs.vo common/Values.vo lib/Integers.vo common/Memory.vo common/Events.vo backend/RTL.vo backend/Conventions.vo
 backend/Kildall.vo backend/Kildall.glob backend/Kildall.v.beautified: backend/Kildall.v lib/Coqlib.vo lib/Iteration.vo lib/Maps.vo lib/Lattice.vo lib/Heaps.vo
 backend/Liveness.vo backend/Liveness.glob backend/Liveness.v.beautified: backend/Liveness.v lib/Coqlib.vo lib/Maps.vo lib/Lattice.vo common/AST.vo $(ARCH)/Op.vo backend/Registers.vo backend/RTL.vo backend/Kildall.vo
 $(ARCH)/ConstpropOp.vo $(ARCH)/ConstpropOp.glob $(ARCH)/ConstpropOp.v.beautified: $(ARCH)/ConstpropOp.v lib/Coqlib.vo common/AST.vo lib/Integers.vo lib/Floats.vo $(ARCH)/Op.vo backend/Registers.vo
diff --git a/Makefile b/Makefile
index d7a0fdd..7357e5e 100644
--- a/Makefile
+++ b/Makefile
@@ -66,7 +66,7 @@ LIB=Axioms.v Coqlib.v Intv.v Maps.v Heaps.v Lattice.v Ordered.v \
 # Parts common to the front-ends and the back-end (in common/)
 
 COMMON=Errors.v AST.v Events.v Globalenvs.v Memdata.v Memtype.v Memory.v \
-  Values.v Smallstep.v Behaviors.v Switch.v Determinism.v
+  Values.v Smallstep.v Behaviors.v Switch.v Determinism.v Subtyping.v
 
 # Back-end modules (in backend/, $(ARCH)/, $(ARCH)/$(VARIANT))
 
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 1ff28d9..d160b2d 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -419,27 +419,33 @@ Definition indexed_memory_access
 Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) :=
   indexed_memory_access (fun base n => Pldr dst base (SAimm n)) mk_immed_mem_word base ofs k.
 
-Definition loadind_float (base: ireg) (ofs: int) (dst: freg) (k: code) :=
-  indexed_memory_access (Pfldd dst) mk_immed_mem_float base ofs k.
-
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
   match ty with
-  | Tint   => do r <- ireg_of dst; OK (loadind_int base ofs r k)
-  | Tfloat => do r <- freg_of dst; OK (loadind_float base ofs r k)
-  | Tlong => Error (msg "Asmgen.loadind")
+  | Tint =>
+      do r <- ireg_of dst; OK (loadind_int base ofs r k)
+  | Tfloat =>
+      do r <- freg_of dst;
+      OK (indexed_memory_access (Pfldd r) mk_immed_mem_float base ofs k)
+  | Tsingle =>
+      do r <- freg_of dst;
+      OK (indexed_memory_access (Pflds r) mk_immed_mem_float base ofs k)
+  | Tlong =>
+      Error (msg "Asmgen.loadind")
   end.
 
-Definition storeind_int (src: ireg) (base: ireg) (ofs: int) (k: code) :=
-  indexed_memory_access (fun base n => Pstr src base (SAimm n)) mk_immed_mem_word base ofs k.
-
-Definition storeind_float (src: freg) (base: ireg) (ofs: int) (k: code) :=
-  indexed_memory_access (Pfstd src) mk_immed_mem_float base ofs k.
-
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
   match ty with
-  | Tint   => do r <- ireg_of src; OK (storeind_int r base ofs k)
-  | Tfloat => do r <- freg_of src; OK (storeind_float r base ofs k)
-  | Tlong => Error (msg "Asmgen.storeind")
+  | Tint =>
+      do r <- ireg_of src;
+      OK (indexed_memory_access (fun base n => Pstr r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tfloat =>
+      do r <- freg_of src;
+      OK (indexed_memory_access (Pfstd r) mk_immed_mem_float base ofs k)
+  | Tsingle =>
+      do r <- freg_of src;
+      OK (indexed_memory_access (Pfsts r) mk_immed_mem_float base ofs k)
+  | Tlong =>
+      Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of memory accesses *)
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 6899040..18f905a 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -239,15 +239,14 @@ Remark loadind_label:
 Proof.
   intros. destruct ty; monadInv H.
   unfold loadind_int; TailNoLabel.
-  unfold loadind_float; TailNoLabel.
+  TailNoLabel.
+  TailNoLabel.
 Qed.
 
 Remark storeind_label:
   forall base ofs ty src k c, storeind src base ofs ty k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct ty; monadInv H.
-  unfold storeind_int; TailNoLabel.
-  unfold storeind_float; TailNoLabel.
+  intros. destruct ty; monadInv H; TailNoLabel.
 Qed.
 
 Remark transl_cond_label:
@@ -556,8 +555,10 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. apply agree_exten with rs0; auto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
   simpl; intros. rewrite Q; auto with asmgen.
+Local Transparent destroyed_by_setstack.
+  destruct ty; simpl; auto with asmgen.
 
 - (* Mgetparam *)
   assert (f0 = f) by congruence; subst f0.
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index e27ee80..2f5f868 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -405,6 +405,7 @@ Proof.
   split. Simpl. intros; Simpl.
 Qed.
 
+(*
 Lemma loadind_float_correct:
   forall (base: ireg) ofs dst (rs: regset) m v k,
   Mem.loadv Mfloat64al32 m (Val.add rs#base (Vint ofs)) = Some v ->
@@ -418,6 +419,7 @@ Proof.
   apply exec_straight_one. simpl. unfold exec_load. rewrite H0; rewrite H; eauto. auto.
   split. Simpl. intros; Simpl.
 Qed.
+*)
 
 Lemma loadind_correct:
   forall (base: ireg) ofs ty dst k c (rs: regset) m v,
@@ -430,12 +432,25 @@ Lemma loadind_correct:
 Proof.
   unfold loadind; intros.
   destruct ty; monadInv H.
+- (* int *)
   erewrite ireg_of_eq by eauto. apply loadind_int_correct; auto. 
-  erewrite freg_of_eq by eauto. apply loadind_float_correct; auto. 
+- (* float *)
+  erewrite freg_of_eq by eauto. simpl in H0.
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
+- (* single *)
+  erewrite freg_of_eq by eauto. simpl in H0.
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
 Qed.
 
 (** Indexed memory stores. *)
 
+(*
 Lemma storeind_int_correct:
   forall (base: ireg) ofs (src: ireg) (rs: regset) m m' k,
   Mem.storev Mint32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
@@ -464,6 +479,7 @@ Proof.
   rewrite H0. rewrite H1; auto with asmgen. rewrite H; eauto. auto.
   intros; Simpl.
 Qed.
+*)
 
 Lemma storeind_correct:
   forall (base: ireg) ofs ty src k c (rs: regset) m m',
@@ -471,13 +487,32 @@ Lemma storeind_correct:
   Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, r <> PC -> r <> IR14 -> rs'#r = rs#r.
+  /\ forall r, r <> PC -> r <> IR14 -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
   unfold storeind; intros.
-  destruct ty; monadInv H.
-  erewrite ireg_of_eq  in H0 by eauto. apply storeind_int_correct; auto.
+  destruct ty; monadInv H; simpl in H0.
+- (* int *)
+  erewrite ireg_of_eq  in H0 by eauto.
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
   assert (IR x <> IR IR14) by eauto with asmgen. congruence.
-  erewrite freg_of_eq in H0 by eauto. apply storeind_float_correct; auto. 
+  auto. intros; Simpl.
+- (* float *)
+  erewrite freg_of_eq  in H0 by eauto.
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
+  auto. intros; Simpl.
+- (* single *)
+  erewrite freg_of_eq  in H0 by eauto.
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
+  auto. intros; Simpl.
 Qed.
 
 (** Translation of shift immediates *)
diff --git a/arm/Machregs.v b/arm/Machregs.v
index 50535f0..d4439ef 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -109,6 +109,12 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   | _ => nil
   end.
 
+Definition destroyed_by_setstack (ty: typ): list mreg :=
+  match ty with
+  | Tsingle => F6 :: nil
+  | _ => nil
+  end.
+
 Definition destroyed_at_function_entry: list mreg :=
   R12 :: nil.
 
@@ -131,7 +137,7 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list(
 Global Opaque
     destroyed_by_op destroyed_by_load destroyed_by_store
     destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
-    destroyed_at_function_entry temp_for_parent_frame
+    destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame
     mregs_for_operation mregs_for_builtin.
 
 (** Two-address operations.  Return [true] if the first argument and
diff --git a/arm/Op.v b/arm/Op.v
index 2c84d90..a55c3f5 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -307,7 +307,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst _ => (nil, Tfloat)
+  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
   | Oadd => (Tint :: Tint :: nil, Tint)
@@ -345,7 +345,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
-  | Osingleoffloat => (Tfloat :: nil, Tfloat)
+  | Osingleoffloat => (Tfloat :: nil, Tsingle)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
@@ -384,6 +384,7 @@ Proof with (try exact I).
   intros.
   destruct op; simpl; simpl in H0; FuncInv; try subst v...
   congruence.
+  destruct (Float.is_single_dec f); red; auto. 
   unfold symbol_address. destruct (Genv.find_symbol genv i)...
   destruct sp...
   destruct v0; destruct v1...
@@ -422,7 +423,7 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0...
+  destruct v0... simpl. apply Float.singleoffloat_is_single.  
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); simpl in H2; inv H2...
   destruct v0; simpl in H0; inv H0. destruct (Float.intuoffloat f); simpl in H2; inv H2...
   destruct v0; simpl in H0; inv H0...
@@ -433,19 +434,6 @@ Proof with (try exact I).
   destruct (eval_condition c vl m)... destruct b...
 Qed.
 
-Lemma type_of_chunk_correct:
-  forall chunk m addr v,
-  Mem.loadv chunk m addr = Some v ->
-  Val.has_type v (type_of_chunk chunk).
-Proof.
-  intro chunk.
-  assert (forall v, Val.has_type (Val.load_result chunk v) (type_of_chunk chunk)).
-  destruct v; destruct chunk; exact I.
-  intros until v. unfold Mem.loadv. 
-  destruct addr; intros; try discriminate.
-  eapply Mem.load_type; eauto.
-Qed.
-
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index cb3ce4d..97ed19a 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -457,22 +457,47 @@ let print_builtin_inline oc name args res =
 
 type direction = Incoming | Outgoing
 
+let ireg_param = function
+  | 0 -> IR0 | 1 -> IR1 | 2 -> IR2 | 3 -> IR3 | _ -> assert false
+
+let freg_param = function
+  | 0 -> FR0 | 1 -> FR1 | 2 -> FR2 | 3 -> FR3 | _ -> assert false
+
+let fixup_double oc dir f i1 i2 =
+  match dir with
+  | Incoming ->     (* f <- (i1, i2)  *)
+      fprintf oc "	fmdrr	%a, %a, %a\n" freg f ireg i1 ireg i2
+  | Outgoing ->     (* (i1, i2) <- f *)
+      fprintf oc "	fmrrd	%a, %a, %a\n" ireg i1 ireg i2 freg f
+
+let fixup_single oc dir f i =
+  match dir with
+  | Incoming ->     (* f <- i; f <- double_of_single f *)
+      fprintf oc "	fmsr	%a, %a\n" freg_single f ireg i;
+      fprintf oc "	fcvtds	%a, %a\n" freg f freg_single f
+  | Outgoing ->     (* f <- single_of_double f; i <- f *)
+      fprintf oc "	fcvtsd	%a, %a\n" freg_single f freg f;
+      fprintf oc "	fmrs	%a, %a\n" ireg i freg_single f
+
 let fixup_conventions oc dir tyl =
-  let fixup f i1 i2 =
-    match dir with
-    | Incoming ->     (* f <- (i1, i2)  *)
-        fprintf oc "	fmdrr	%a, %a, %a\n" freg f ireg i1 ireg i2
-    | Outgoing ->      (* (i1, i2) <- f *)
-        fprintf oc "	fmrrd	%a, %a, %a\n" ireg i1 ireg i2 freg f in
-  match tyl with
-  | Tfloat :: Tfloat :: _ ->
-      fixup FR0 IR0 IR1; fixup FR1 IR2 IR3; 4
-  | Tfloat :: _ ->
-      fixup FR0 IR0 IR1; 2
-  | Tint :: Tfloat :: _ | Tint :: Tint :: Tfloat :: _ ->
-      fixup FR1 IR2 IR3; 2
-  | _ ->
-      0
+  let rec fixup i tyl =
+    if i >= 4 then 0 else
+      match tyl with
+      | [] -> 0
+      | Tint :: tyl' ->
+          fixup (i+1) tyl'
+      | Tlong :: tyl' ->
+          fixup (((i + 1) land (-2)) + 2) tyl'
+      | Tfloat :: tyl' ->
+          let i = (i + 1) land (-2) in
+          if i >= 4 then 0 else begin
+            fixup_double oc dir (freg_param i) (ireg_param i) (ireg_param (i+1));
+            1 + fixup (i+2) tyl'
+          end
+      | Tsingle :: tyl' ->
+          fixup_single oc dir (freg_param i) (ireg_param i);
+          2 + fixup (i+1) tyl'
+  in fixup 0 tyl
 
 let fixup_arguments oc dir sg =
   fixup_conventions oc dir sg.sig_args
diff --git a/arm/eabi/Conventions1.v b/arm/eabi/Conventions1.v
index 1731eba..9fc9a50 100644
--- a/arm/eabi/Conventions1.v
+++ b/arm/eabi/Conventions1.v
@@ -236,7 +236,7 @@ Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R0 :: nil
   | Some Tint => R0 :: nil
-  | Some Tfloat => F0 :: nil
+  | Some (Tfloat | Tsingle) => F0 :: nil
   | Some Tlong => R1 :: R0 :: nil
   end.
 
@@ -256,15 +256,23 @@ Qed.
 
 (** We use the following calling conventions, adapted from the ARM EABI:
 - The first 4 integer arguments are passed in registers [R0] to [R3].
-- The first 2 float arguments are passed in registers [F0] and [F1].
+- The first 2 float arguments are passed in registers [F0] and [F2].
+- The first 4 float arguments are passed in registers [F0] to [F3].
+- The first 2 integer arguments are passed in an aligned pair of two integer
+  registers.
 - Each float argument passed in a float register ``consumes'' an aligned pair
   of two integer registers.
-- Each long integer argument is passed in an aligned pair of two integer
-  registers.
+- Each single argument passed in a float register ``consumes'' an integer
+  register.
 - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively
-  assigned (1 word for an integer argument, 2 words for a float or a long),
-  starting at word offset 0.
-*)
+  assigned (1 word for an integer or single argument, 2 words for a float
+  or a long), starting at word offset 0.
+
+This convention is not quite that of the ARM EABI, whereas every float
+argument are passed in one or two integer registers.  Unfortunately,
+this does not fit the data model of CompCert.  In [PrintAsm.ml]
+we insert additional code around function calls and returns that moves
+data appropriately. *)
 
 Definition ireg_param (n: Z) : mreg :=
   if zeq n (-4) then R0
@@ -273,7 +281,13 @@ Definition ireg_param (n: Z) : mreg :=
   else R3.
 
 Definition freg_param (n: Z) : mreg :=
-  if zeq n (-4) then F0 else F1.
+  if zeq n (-4) then F0 else F2.
+
+Definition sreg_param (n: Z) : mreg :=
+  if zeq n (-4) then F0
+  else if zeq n (-3) then F1
+  else if zeq n (-2) then F2
+  else F3.
 
 Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
@@ -285,6 +299,9 @@ Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
       let ofs := align ofs 2 in
       (if zle 0 ofs then S Outgoing ofs Tfloat else R (freg_param ofs))
       :: loc_arguments_rec tys (ofs + 2)
+  | Tsingle :: tys =>
+      (if zle 0 ofs then S Outgoing ofs Tsingle else R (sreg_param ofs))
+      :: loc_arguments_rec tys (ofs + 1)
   | Tlong :: tys =>
       let ofs := align ofs 2 in
       (if zle 0 ofs then S Outgoing (ofs + 1) Tint else R (ireg_param (ofs + 1)))
@@ -304,7 +321,7 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys => size_arguments_rec tys (ofs + 1)
+  | (Tint | Tsingle) :: tys => size_arguments_rec tys (ofs + 1)
   | (Tfloat | Tlong) :: tys => size_arguments_rec tys (align ofs 2 + 2)
   end.
 
@@ -321,15 +338,6 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
   | _ => False
   end.
 
-(*
-Lemma align_monotone:
-  forall x1 x2 y, y > 0 -> x1 <= x2 -> align x1 y <= align x2 y.
-Proof.
-  intros. unfold align. apply Zmult_le_compat_r. apply Z_div_le. 
-  omega. omega. omega.
-Qed.
-*)
-
 Remark ireg_param_caller_save:
   forall n, In (ireg_param n) destroyed_at_call.
 Proof.
@@ -345,6 +353,15 @@ Proof.
   unfold freg_param; intros. destruct (zeq n (-4)); simpl; OrEq.
 Qed.
 
+Remark sreg_param_caller_save:
+  forall n, In (sreg_param n) destroyed_at_call.
+Proof.
+  unfold sreg_param; intros.
+  destruct (zeq n (-4)). simpl; tauto. 
+  destruct (zeq n (-3)). simpl; tauto. 
+  destruct (zeq n (-2)); simpl; tauto.
+Qed.
+
 Remark loc_arguments_rec_charact:
   forall tyl ofs l,
   In l (loc_arguments_rec tyl ofs) ->
@@ -381,6 +398,12 @@ Proof.
   split. omega. split. omega. congruence.
   apply ireg_param_caller_save.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tsingle *)
+  destruct H.
+  subst l. destruct (zle 0 ofs).
+  split. omega. split. omega. congruence.
+  apply sreg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -409,6 +432,7 @@ Proof.
   apply Zle_trans with (align ofs 2 + 2); auto; omega.
   assert (ofs <= align ofs 2) by (apply align_le; omega).
   apply Zle_trans with (align ofs 2 + 2); auto; omega.
+  apply Zle_trans with (ofs + 1); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -448,6 +472,8 @@ Proof.
     destruct H1; auto.
     destruct (zle 0 (align ofs0 2)); inv H1.
     eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega.
+  - (* Tsingle *)
+    destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
   }
   unfold size_arguments. apply H1. auto.  
 Qed.
diff --git a/backend/Allocation.v b/backend/Allocation.v
index e4d0972..a4dd3af 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -608,6 +608,20 @@ Definition subst_loc (l1 l2: loc) (e: eqs) : option eqs :=
      (EqSet2.elements_between (select_loc_l l1) (select_loc_h l1) (eqs2 e))
      (Some e).
 
+(** [loc_type_compat env l e] checks that for all equations [r = l] in [e],
+  the type [env r] of [r] is compatible with the type of [l]. *)
+
+Definition sel_type (k: equation_kind) (ty: typ) : typ :=
+  match k with
+  | Full => ty
+  | Low | High => Tint
+  end.
+
+Definition loc_type_compat (env: regenv) (l: loc) (e: eqs) : bool :=
+  EqSet2.for_all_between
+    (fun q => subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l))
+    (select_loc_l l) (select_loc_h l) (eqs2 e).
+
 (** [add_equations [r1...rN] [m1...mN] e] adds to [e] the [N] equations
     [ri = R mi [Full]].  Return [None] if the two lists have different lengths.
 *)
@@ -628,7 +642,7 @@ Function add_equations_args (rl: list reg) (tyl: list typ) (ll: list loc) (e: eq
   | nil, nil, nil => Some e
   | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll =>
       add_equations_args rl tyl ll (add_equation (Eq Low r1 l2) (add_equation (Eq High r1 l1) e))
-  | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll =>
+  | r1 :: rl, (Tint|Tfloat|Tsingle) :: tyl, l1 :: ll =>
       add_equations_args rl tyl ll (add_equation (Eq Full r1 l1) e)
   | _, _, _ => None
   end.
@@ -752,9 +766,16 @@ Definition ros_compatible_tailcall (ros: mreg + ident) : bool :=
 (** * The validator *)
 
 Definition destroyed_by_move (src dst: loc) :=
-  match src with
-  | S sl ofs ty => destroyed_by_getstack sl
-  | _ => destroyed_by_op Omove
+  match src, dst with
+  | S sl ofs ty, _ => destroyed_by_getstack sl
+  | _, S sl ofs ty => destroyed_by_setstack ty
+  | _, _ => destroyed_by_op Omove
+  end.
+
+Definition well_typed_move (env: regenv) (dst: loc) (e: eqs) : bool :=
+  match dst with
+  | R r => true
+  | S sl ofs ty => loc_type_compat env dst e
   end.
 
 (** Simulate the effect of a sequence of moves [mv] on a set of
@@ -763,14 +784,14 @@ Definition destroyed_by_move (src dst: loc) :=
   must hold before the sequence of moves.  Return [None] if the
   set of equations [e] cannot hold after the sequence of moves. *)
 
-Fixpoint track_moves (mv: moves) (e: eqs) : option eqs :=
+Fixpoint track_moves (env: regenv) (mv: moves) (e: eqs) : option eqs :=
   match mv with
   | nil => Some e
   | (src, dst) :: mv =>
-      do e1 <- track_moves mv e;
-      do e2 <- subst_loc dst src e1;
+      do e1 <- track_moves env mv e;
       assertion (can_undef_except dst (destroyed_by_move src dst)) e1;
-      Some e2
+      assertion (well_typed_move env dst e1);
+      subst_loc dst src e1
   end.
 
 (** [transfer_use_def args res args' res' undefs e] returns the set
@@ -802,89 +823,89 @@ Definition kind_second_word := if big_endian then Low else High.
   equations that must hold "before" these instructions, or [None] if
   impossible. *)
 
-Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option eqs :=
+Definition transfer_aux (f: RTL.function) (env: regenv) (shape: block_shape) (e: eqs) : option eqs :=
   match shape with
   | BSnop mv s =>
-      track_moves mv e
+      track_moves env mv e
   | BSmove src dst mv s =>
-      track_moves mv (subst_reg dst src e)
+      track_moves env mv (subst_reg dst src e)
   | BSmakelong src1 src2 dst mv s =>
       let e1 := subst_reg_kind dst High src1 Full e in
       let e2 := subst_reg_kind dst Low src2 Full e1 in
       assertion (reg_unconstrained dst e2);
-      track_moves mv e2
+      track_moves env mv e2
   | BSlowlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src Low e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BShighlong src dst mv s =>
       let e1 := subst_reg_kind dst Full src High e in
       assertion (reg_unconstrained dst e1);
-      track_moves mv e1
+      track_moves env mv e1
   | BSop op args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args res args' res' (destroyed_by_op op) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSopdead op args res mv s =>
       assertion (reg_unconstrained res e);
-      track_moves mv e
+      track_moves env mv e
   | BSload chunk addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- transfer_use_def args dst args' dst' (destroyed_by_load chunk addr) e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSload2 addr addr' args dst mv1 args1' dst1' mv2 args2' dst2' mv3 s =>
-      do e1 <- track_moves mv3 e;
+      do e1 <- track_moves env mv3 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst2')) e1 in
       assertion (loc_unconstrained (R dst2') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args2' e2;
-      do e4 <- track_moves mv2 e3;
+      do e4 <- track_moves env mv2 e3;
       let e5 := remove_equation (Eq kind_first_word dst (R dst1')) e4 in
       assertion (loc_unconstrained (R dst1') e5);
       assertion (can_undef (destroyed_by_load Mint32 addr) e5);
       assertion (reg_unconstrained dst e5);
       do e6 <- add_equations args args1' e5;
-      track_moves mv1 e6
+      track_moves env mv1 e6
   | BSload2_1 addr args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_first_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr) e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSload2_2 addr addr' args dst mv1 args' dst' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let e2 := remove_equation (Eq kind_second_word dst (R dst')) e1 in
       assertion (reg_loc_unconstrained dst (R dst') e2);
       assertion (can_undef (destroyed_by_load Mint32 addr') e2);
       do e3 <- add_equations args args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSloaddead chunk addr args dst mv s =>
       assertion (reg_unconstrained dst e);
-      track_moves mv e
+      track_moves env mv e
   | BSstore chunk addr args src mv args' src' s =>
       assertion (can_undef (destroyed_by_store chunk addr) e);
       do e1 <- add_equations (src :: args) (src' :: args') e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSstore2 addr addr' args src mv1 args1' src1' mv2 args2' src2' s =>
       assertion (can_undef (destroyed_by_store Mint32 addr') e);
       do e1 <- add_equations args args2' 
                   (add_equation (Eq kind_second_word src (R src2')) e);
-      do e2 <- track_moves mv2 e1;
+      do e2 <- track_moves env mv2 e1;
       assertion (can_undef (destroyed_by_store Mint32 addr) e2);
       do e3 <- add_equations args args1' 
                   (add_equation (Eq kind_first_word src (R src1')) e2);
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BScall sg ros args res mv1 ros' mv2 s =>
       let args' := loc_arguments sg in
       let res' := map R (loc_result sg) in
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       do e2 <- remove_equations_res res (sig_res sg) res' e1;
       assertion (forallb (fun l => reg_loc_unconstrained res l e2) res');
       assertion (no_caller_saves e2);
       do e3 <- add_equation_ros ros ros' e2;
       do e4 <- add_equations_args args (sig_args sg) args' e3;
-      track_moves mv1 e4
+      track_moves env mv1 e4
   | BStailcall sg ros args mv1 ros' =>
       let args' := loc_arguments sg in
       assertion (tailcall_is_possible sg);
@@ -892,9 +913,9 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (ros_compatible_tailcall ros');
       do e1 <- add_equation_ros ros ros' empty_eqs;
       do e2 <- add_equations_args args (sig_args sg) args' e1;
-      track_moves mv1 e2
+      track_moves env mv1 e2
   | BSbuiltin ef args res mv1 args' res' mv2 s =>
-      do e1 <- track_moves mv2 e;
+      do e1 <- track_moves env mv2 e;
       let args' := map R args' in
       let res' := map R res' in
       do e2 <- remove_equations_res res (sig_res (ef_sig ef)) res' e1;
@@ -902,23 +923,23 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
       assertion (forallb (fun l => loc_unconstrained l e2) res');
       assertion (can_undef (destroyed_by_builtin ef) e2);
       do e3 <- add_equations_args args (sig_args (ef_sig ef)) args' e2;
-      track_moves mv1 e3
+      track_moves env mv1 e3
   | BSannot txt typ args res mv1 args' s =>
       do e1 <- add_equations_args args (annot_args_typ typ) args' e;
-      track_moves mv1 e1
+      track_moves env mv1 e1
   | BScond cond args mv args' s1 s2 =>
       assertion (can_undef (destroyed_by_cond cond) e);
       do e1 <- add_equations args args' e;
-      track_moves mv e1
+      track_moves env mv e1
   | BSjumptable arg mv arg' tbl =>
       assertion (can_undef destroyed_by_jumptable e);
-      track_moves mv (add_equation (Eq Full arg (R arg')) e)
+      track_moves env mv (add_equation (Eq Full arg (R arg')) e)
   | BSreturn None mv =>
-      track_moves mv empty_eqs
+      track_moves env mv empty_eqs
   | BSreturn (Some arg) mv =>
       let arg' := map R (loc_result (RTL.fn_sig f)) in
       do e1 <- add_equations_res arg (sig_res (RTL.fn_sig f)) arg' empty_eqs;
-      track_moves mv e1
+      track_moves env mv e1
   end.
 
 (** The main transfer function for the dataflow analysis.  Like [transfer_aux],
@@ -926,7 +947,7 @@ Definition transfer_aux (f: RTL.function) (shape: block_shape) (e: eqs) : option
   equations that must hold "after".  It also handles error propagation
   and reporting. *)
 
-Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
+Definition transfer (f: RTL.function) (env: regenv) (shapes: PTree.t block_shape)
                     (pc: node) (after: res eqs) : res eqs :=
   match after with
   | Error _ => after
@@ -934,7 +955,7 @@ Definition transfer (f: RTL.function) (shapes: PTree.t block_shape)
       match shapes!pc with
       | None => Error(MSG "At PC " :: POS pc :: MSG ": unmatched block" :: nil)
       | Some shape =>
-          match transfer_aux f shape e with
+          match transfer_aux f env shape e with
           | None => Error(MSG "At PC " :: POS pc :: MSG ": invalid register allocation" :: nil)
           | Some e' => OK e'
           end
@@ -1082,8 +1103,8 @@ Definition successors_block_shape (bsh: block_shape) : list node :=
   | BSreturn optarg mv => nil
   end.
 
-Definition analyze (f: RTL.function) (bsh: PTree.t block_shape) :=
-  DS.fixpoint (PTree.map1 successors_block_shape bsh) (transfer f bsh) nil.
+Definition analyze (f: RTL.function) (env: regenv) (bsh: PTree.t block_shape) :=
+  DS.fixpoint (PTree.map1 successors_block_shape bsh) (transfer f env bsh) nil.
 
 (** * Validating and translating functions and programs *)
 
@@ -1107,7 +1128,7 @@ Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e
   | nil, nil, nil => true
   | r1 :: rl, Tlong :: tyl, l1 :: l2 :: ll =>
       compat_left2 r1 l1 l2 e && compat_entry rl tyl ll e
-  | r1 :: rl, (Tint|Tfloat) :: tyl, l1 :: ll =>
+  | r1 :: rl, (Tint|Tfloat|Tsingle) :: tyl, l1 :: ll =>
       compat_left r1 l1 e && compat_entry rl tyl ll e
   | _, _, _ => false
   end.
@@ -1116,9 +1137,10 @@ Function compat_entry (rparams: list reg) (tys: list typ) (lparams: list loc) (e
   point.  We also check that the RTL and LTL functions agree in signature
   and stack size. *)
 
-Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function) (e1: eqs) : option unit :=
+Definition check_entrypoints_aux (rtl: RTL.function) (ltl: LTL.function)
+                                 (env: regenv) (e1: eqs) : option unit :=
   do mv <- pair_entrypoints rtl ltl;
-  do e2 <- track_moves mv e1;
+  do e2 <- track_moves env mv e1;
   assertion (compat_entry (RTL.fn_params rtl)
                           (sig_args (RTL.fn_sig rtl))
                           (loc_parameters (RTL.fn_sig rtl)) e2);
@@ -1131,10 +1153,10 @@ Local Close Scope option_monad_scope.
 Local Open Scope error_monad_scope.
 
 Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
-                            (bsh: PTree.t block_shape)
-                            (a: PMap.t LEq.t): res unit :=
-  do e1 <- transfer rtl bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
-  match check_entrypoints_aux rtl ltl e1 with
+                             (env: regenv) (bsh: PTree.t block_shape)
+                             (a: PMap.t LEq.t): res unit :=
+  do e1 <- transfer rtl env bsh (RTL.fn_entrypoint rtl) a!!(RTL.fn_entrypoint rtl);
+  match check_entrypoints_aux rtl ltl env e1 with
   | None => Error (msg "invalid register allocation at entry point")
   | Some _ => OK tt
   end.
@@ -1143,11 +1165,11 @@ Definition check_entrypoints (rtl: RTL.function) (ltl: LTL.function)
   a source RTL function and an LTL function generated by the external
   register allocator. *)
 
-Definition check_function (rtl: RTL.function) (ltl: LTL.function) : res unit :=
+Definition check_function (rtl: RTL.function) (ltl: LTL.function) (env: regenv) : res unit :=
   let bsh := pair_codes rtl ltl in
-  match analyze rtl bsh with
+  match analyze rtl env bsh with
   | None => Error (msg "allocation analysis diverges")
-  | Some a => check_entrypoints rtl ltl bsh a
+  | Some a => check_entrypoints rtl ltl env bsh a
   end.
 
 (** [regalloc] is the external register allocator.  It is written in OCaml
@@ -1160,10 +1182,10 @@ Parameter regalloc: RTL.function -> res LTL.function.
 Definition transf_function (f: RTL.function) : res LTL.function :=
   match type_function f with
   | Error m => Error m
-  | OK tyenv =>
+  | OK env =>
       match regalloc f with
       | Error m => Error m
-      | OK tf => do x <- check_function f tf; OK tf
+      | OK tf => do x <- check_function f tf env; OK tf
       end
   end.
 
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index a4aa861..f05b05e 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -432,6 +432,7 @@ Proof.
   eapply add_equation_satisf. eapply add_equation_satisf. eauto. 
   eapply add_equation_satisf. eauto.
   eapply add_equation_satisf. eauto.
+  eapply add_equation_satisf. eauto.
   congruence.
 Qed.
 
@@ -464,6 +465,8 @@ Proof.
   eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
 - destruct H1. constructor; auto. 
   eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
+- destruct H1. constructor; auto. 
+  eapply add_equation_lessdef with (q := Eq Full r1 l1). eapply add_equations_args_satisf; eauto.
 - discriminate.
 Qed.
 
@@ -635,15 +638,16 @@ Proof.
 Qed.
 
 Lemma loc_unconstrained_satisf:
-  forall rs ls k r l e v,
+  forall rs ls k r mr e v,
+  let l := R mr in
   satisf rs ls (remove_equation (Eq k r l) e) ->
-  loc_unconstrained l (remove_equation (Eq k r l) e) = true ->
+  loc_unconstrained (R mr) (remove_equation (Eq k r l) e) = true ->
   Val.lessdef (sel_val k rs#r) v ->
   satisf rs (Locmap.set l v ls) e.
 Proof.
   intros; red; intros. 
   destruct (OrderedEquation.eq_dec q (Eq k r l)). 
-  subst q; simpl. rewrite Locmap.gss. auto.
+  subst q; simpl. unfold l; rewrite Locmap.gss_reg. auto.
   assert (EqSet.In q (remove_equation (Eq k r l) e)).
     simpl. ESD.fsetdec.  
   rewrite Locmap.gso. apply H; auto. eapply loc_unconstrained_sound; eauto. 
@@ -660,15 +664,16 @@ Proof.
 Qed.
 
 Lemma parallel_assignment_satisf:
-  forall k r l e rs ls v v',
+  forall k r mr e rs ls v v',
+  let l := R mr in
   Val.lessdef (sel_val k v) v' ->
-  reg_loc_unconstrained r l (remove_equation (Eq k r l) e) = true ->
+  reg_loc_unconstrained r (R mr) (remove_equation (Eq k r l) e) = true ->
   satisf rs ls (remove_equation (Eq k r l) e) ->
   satisf (rs#r <- v) (Locmap.set l v' ls) e.
 Proof.
   intros; red; intros.
   destruct (OrderedEquation.eq_dec q (Eq k r l)).
-  subst q; simpl. rewrite Regmap.gss; rewrite Locmap.gss; auto.
+  subst q; simpl. unfold l; rewrite Regmap.gss; rewrite Locmap.gss_reg; auto.
   assert (EqSet.In q (remove_equation {| ekind := k; ereg := r; eloc := l |} e)).
     simpl. ESD.fsetdec.
   exploit reg_loc_unconstrained_sound; eauto. intros [A B].
@@ -676,7 +681,8 @@ Proof.
 Qed.
 
 Lemma parallel_assignment_satisf_2:
-  forall rs ls res res' oty e e' v v',
+  forall rs ls res mres' oty e e' v v',
+  let res' := map R mres' in
   remove_equations_res res oty res' e = Some e' ->
   satisf rs ls e' ->
   reg_unconstrained res e' = true ->
@@ -685,17 +691,21 @@ Lemma parallel_assignment_satisf_2:
   satisf (rs#res <- v) (Locmap.setlist res' (encode_long oty v') ls) e.
 Proof.
   intros; red; intros.
+  assert (ISREG: forall l, In l res' -> exists mr, l = R mr).
+  { unfold res'; intros. exploit list_in_map_inv; eauto. intros [mr [A B]]. exists mr; auto. }
   functional inversion H.
 - (* Two 32-bit halves *)
-  subst.
+  subst. 
   set (e' := remove_equation {| ekind := Low; ereg := res; eloc := l2 |}
           (remove_equation {| ekind := High; ereg := res; eloc := l1 |} e)) in *.
-  simpl in H2. InvBooleans. simpl.
+  rewrite <- H5 in H2. simpl in H2. InvBooleans. simpl.
   destruct (OrderedEquation.eq_dec q (Eq Low res l2)).
-  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss.
+  subst q; simpl. rewrite Regmap.gss.
+  destruct (ISREG l2) as [r2 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
   apply Val.loword_lessdef; auto. 
   destruct (OrderedEquation.eq_dec q (Eq High res l1)).
-  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto. rewrite Locmap.gss.
+  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gso by auto.
+  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
   apply Val.hiword_lessdef; auto.
   assert (EqSet.In q e'). unfold e', remove_equation; simpl; ESD.fsetdec. 
   rewrite Regmap.gso. rewrite ! Locmap.gso. auto. 
@@ -703,11 +713,13 @@ Proof.
   eapply loc_unconstrained_sound; eauto.
   eapply reg_unconstrained_sound; eauto.
 - (* One location *)
-  subst. simpl in H2. InvBooleans.
+  subst. rewrite <- H5 in H2. simpl in H2. InvBooleans.
   replace (encode_long oty v') with (v' :: nil).
   set (e' := remove_equation {| ekind := Full; ereg := res; eloc := l1 |} e) in *.
   destruct (OrderedEquation.eq_dec q (Eq Full res l1)). 
-  subst q; simpl. rewrite Regmap.gss. rewrite Locmap.gss. auto.
+  subst q; simpl. rewrite Regmap.gss.
+  destruct (ISREG l1) as [r1 EQ]. rewrite <- H5; auto with coqlib. rewrite EQ. rewrite Locmap.gss_reg.
+  auto.
   assert (EqSet.In q e'). unfold e', remove_equation; simpl. ESD.fsetdec. 
   simpl. rewrite Regmap.gso. rewrite Locmap.gso. auto.
   eapply loc_unconstrained_sound; eauto.
@@ -956,15 +968,70 @@ Proof.
   split. apply eqs_same; auto. auto. 
 Qed.
 
+Lemma loc_type_compat_charact:
+  forall env l e q,
+  loc_type_compat env l e = true ->
+  EqSet.In q e ->
+  subtype (sel_type (ekind q) (env (ereg q))) (Loc.type l) = true \/ Loc.diff l (eloc q).
+Proof.
+  unfold loc_type_compat; intros. 
+  rewrite EqSet2.for_all_between_iff in H.
+  destruct (select_loc_l l q) eqn: LL.
+  destruct (select_loc_h l q) eqn: LH.
+  left; apply H; auto. apply eqs_same; auto. 
+  right. apply select_loc_charact. auto. 
+  right. apply select_loc_charact. auto.
+  intros; subst; auto.
+  exact (select_loc_l_monotone l).
+  exact (select_loc_h_monotone l).
+Qed.
+
+Lemma well_typed_move_charact:
+  forall env l e k r rs,
+  well_typed_move env l e = true ->
+  EqSet.In (Eq k r l) e ->
+  wt_regset env rs ->
+  match l with
+  | R mr => True
+  | S sl ofs ty => Val.has_type (sel_val k rs#r) ty
+  end.
+Proof.
+  unfold well_typed_move; intros. 
+  destruct l as [mr | sl ofs ty]. 
+  auto.
+  exploit loc_type_compat_charact; eauto. intros [A | A].
+  simpl in A. eapply Val.has_subtype; eauto. 
+  generalize (H1 r). destruct k; simpl; intros.
+  auto.
+  destruct (rs#r); exact I.
+  destruct (rs#r); exact I.
+  eelim Loc.diff_not_eq. eexact A. auto.
+Qed.
+
+Remark val_lessdef_normalize:
+  forall v v' ty,
+  Val.has_type v ty -> Val.lessdef v v' ->
+  Val.lessdef v (Val.load_result (chunk_of_type ty) v').
+Proof.
+  intros. inv H0. rewrite Val.load_result_same; auto. auto. 
+Qed.
+
 Lemma subst_loc_satisf:
-  forall src dst rs ls e e',
+  forall env src dst rs ls e e',
   subst_loc dst src e = Some e' ->
+  well_typed_move env dst e = true ->
+  wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) ls) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
-  subst dst. rewrite Locmap.gss. apply (H0 _ B). 
+  subst dst. rewrite Locmap.gss.
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H2 _ B). 
+  apply val_lessdef_normalize; auto. apply (H2 _ B). 
   rewrite Locmap.gso; auto.
 Qed.
 
@@ -1011,15 +1078,22 @@ Proof.
 Qed.
 
 Lemma subst_loc_undef_satisf:
-  forall src dst rs ls ml e e',
+  forall env src dst rs ls ml e e',
   subst_loc dst src e = Some e' ->
+  well_typed_move env dst e = true ->
   can_undef_except dst ml e = true ->
+  wt_regset env rs ->
   satisf rs ls e' ->
   satisf rs (Locmap.set dst (ls src) (undef_regs ml ls)) e.
 Proof.
   intros; red; intros.
   exploit in_subst_loc; eauto. intros [[A B] | [A B]].
-  rewrite A. rewrite Locmap.gss. apply (H1 _ B). 
+  subst dst. rewrite Locmap.gss.
+  destruct q as [k r l]; simpl in *.
+  exploit well_typed_move_charact; eauto.
+  destruct l as [mr | sl ofs ty]; intros.
+  apply (H3 _ B). 
+  apply val_lessdef_normalize; auto. apply (H3 _ B). 
   rewrite Locmap.gso; auto. rewrite undef_regs_outside. eauto. 
   eapply can_undef_except_sound; eauto. apply Loc.diff_sym; auto.
 Qed.
@@ -1214,6 +1288,11 @@ Proof.
   red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). 
   exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto.
   eapply IHb; eauto.
+- (* a param of type Tsingle *)
+  InvBooleans. simpl in H0. inv H0. simpl.
+  red; intros. rewrite Regmap.gsspec. destruct (peq (ereg q) r1). 
+  exploit compat_left_sound; eauto. intros [A B]. rewrite A; rewrite B; auto.
+  eapply IHb; eauto.
 - (* error case *)
   discriminate.
 Qed.
@@ -1256,29 +1335,29 @@ Qed.
 (** * Properties of the dataflow analysis *)
 
 Lemma analyze_successors:
-  forall f bsh an pc bs s e,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ EqSet.Subset e' e.
+  exists e', transfer f env bsh s an!!s = OK e' /\ EqSet.Subset e' e.
 Proof.
   unfold analyze; intros. exploit DS.fixpoint_solution; eauto.
   instantiate (1 := pc). instantiate (1 := s). 
   unfold successors_list. rewrite PTree.gmap1. rewrite H0. simpl. auto. 
-  rewrite H2. unfold DS.L.ge. destruct (transfer f bsh s an#s); intros.
+  rewrite H2. unfold DS.L.ge. destruct (transfer f env bsh s an#s); intros.
   exists e0; auto. 
   contradiction.
 Qed.
 
 Lemma satisf_successors:
-  forall f bsh an pc bs s e rs ls,
-  analyze f bsh = Some an ->
+  forall f env bsh an pc bs s e rs ls,
+  analyze f env bsh = Some an ->
   bsh!pc = Some bs ->
   In s (successors_block_shape bs) ->
   an!!pc = OK e ->
   satisf rs ls e ->
-  exists e', transfer f bsh s an!!s = OK e' /\ satisf rs ls e'.
+  exists e', transfer f env bsh s an!!s = OK e' /\ satisf rs ls e'.
 Proof.
   intros. exploit analyze_successors; eauto. intros [e' [A B]]. 
   exists e'; split; auto. eapply satisf_incr; eauto.
@@ -1288,18 +1367,18 @@ Qed.
 
 Inductive transf_function_spec (f: RTL.function) (tf: LTL.function) : Prop :=
   | transf_function_spec_intro:  
-  forall tyenv an mv k e1 e2,
-  wt_function f tyenv ->
-  analyze f (pair_codes f tf) = Some an ->
-  (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
-  wf_moves mv ->
-  transfer f (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
-  track_moves mv e1 = Some e2 ->
-  compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
-  can_undef destroyed_at_function_entry e2 = true ->
-  RTL.fn_stacksize f = LTL.fn_stacksize tf ->
-  RTL.fn_sig f = LTL.fn_sig tf ->
-  transf_function_spec f tf.
+      forall env an mv k e1 e2,
+      wt_function f env ->
+      analyze f env (pair_codes f tf) = Some an ->
+      (LTL.fn_code tf)!(LTL.fn_entrypoint tf) = Some(expand_moves mv (Lbranch (RTL.fn_entrypoint f) :: k)) ->
+      wf_moves mv ->
+      transfer f env (pair_codes f tf) (RTL.fn_entrypoint f) an!!(RTL.fn_entrypoint f) = OK e1 ->
+      track_moves env mv e1 = Some e2 ->
+      compat_entry (RTL.fn_params f) (sig_args (RTL.fn_sig f)) (loc_parameters (fn_sig tf)) e2 = true ->
+      can_undef destroyed_at_function_entry e2 = true ->
+      RTL.fn_stacksize f = LTL.fn_stacksize tf ->
+      RTL.fn_sig f = LTL.fn_sig tf ->
+      transf_function_spec f tf.
 
 Lemma transf_function_inv:
   forall f tf,
@@ -1307,13 +1386,13 @@ Lemma transf_function_inv:
   transf_function_spec f tf.
 Proof.
   unfold transf_function; intros.
-  destruct (type_function f) as [tyenv|] eqn:TY; try discriminate.
+  destruct (type_function f) as [env|] eqn:TY; try discriminate.
   destruct (regalloc f); try discriminate.
-  destruct (check_function f f0) as [] eqn:?; inv H.
+  destruct (check_function f f0 env) as [] eqn:?; inv H.
   unfold check_function in Heqr. 
-  destruct (analyze f (pair_codes f tf)) as [an|] eqn:?; try discriminate.
+  destruct (analyze f env (pair_codes f tf)) as [an|] eqn:?; try discriminate.
   monadInv Heqr. 
-  destruct (check_entrypoints_aux f tf x) as [y|] eqn:?; try discriminate.
+  destruct (check_entrypoints_aux f tf env x) as [y|] eqn:?; try discriminate.
   unfold check_entrypoints_aux, pair_entrypoints in Heqo0. MonadInv.
   exploit extract_moves_sound; eauto. intros [A B]. subst b.
   exploit check_succ_sound; eauto. intros [k EQ1]. subst b0.
@@ -1321,21 +1400,24 @@ Proof.
 Qed.
 
 Lemma invert_code:
-  forall f tf pc i opte e,
+  forall f env tf pc i opte e,
+  wt_function f env ->
   (RTL.fn_code f)!pc = Some i ->
-  transfer f (pair_codes f tf) pc opte = OK e ->
+  transfer f env (pair_codes f tf) pc opte = OK e ->
   exists eafter, exists bsh, exists bb,
   opte = OK eafter /\ 
   (pair_codes f tf)!pc = Some bsh /\
   (LTL.fn_code tf)!pc = Some bb /\
   expand_block_shape bsh i bb /\
-  transfer_aux f bsh eafter = Some e.
+  transfer_aux f env bsh eafter = Some e /\
+  wt_instr f env i.
 Proof.
-  intros. destruct opte as [eafter|]; simpl in H0; try discriminate. exists eafter.
+  intros. destruct opte as [eafter|]; simpl in H1; try discriminate. exists eafter.
   destruct (pair_codes f tf)!pc as [bsh|] eqn:?; try discriminate. exists bsh. 
   exploit matching_instr_block; eauto. intros [bb [A B]].
-  destruct (transfer_aux f bsh eafter) as [e1|] eqn:?; inv H0. 
-  exists bb. auto.
+  destruct (transfer_aux f env bsh eafter) as [e1|] eqn:?; inv H1. 
+  exists bb. exploit wt_instr_at; eauto.
+  tauto. 
 Qed.
 
 (** * Semantic preservation *)
@@ -1409,10 +1491,11 @@ Proof.
 Qed.
 
 Lemma exec_moves:
-  forall mv rs s f sp bb m e e' ls,
-  track_moves mv e = Some e' ->
+  forall mv env rs s f sp bb m e e' ls,
+  track_moves env mv e = Some e' ->
   wf_moves mv ->
   satisf rs ls e' ->
+  wt_regset env rs ->
   exists ls',
     star step tge (Block s f sp (expand_moves mv bb) ls m)
                E0 (Block s f sp bb ls' m)
@@ -1424,7 +1507,7 @@ Opaque destroyed_by_op.
 - unfold expand_moves; simpl. inv H. exists ls; split. apply star_refl. auto.
   (* step *)
 - destruct a as [src dst]. unfold expand_moves. simpl. 
-  destruct (track_moves mv e) as [e1|] eqn:?; MonadInv.
+  destruct (track_moves env mv e) as [e1|] eqn:?; MonadInv.
   assert (wf_moves mv). red; intros. apply H0; auto with coqlib. 
   destruct src as [rsrc | ssrc]; destruct dst as [rdst | sdst].
   (* reg-reg *)
@@ -1450,16 +1533,17 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
       sg.(sig_res) = Some Tint ->
       match_stackframes nil nil sg
   | match_stackframes_cons:
-      forall res f sp pc rs s tf bb ls ts sg an e tyenv
+      forall res f sp pc rs s tf bb ls ts sg an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
-        (WTF: wt_function f tyenv)
-        (WTRS: wt_regset tyenv rs)
-        (WTRES: tyenv res = proj_sig_res sg)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs)
+        (WTRES: subtype (proj_sig_res sg) (env res) = true)
         (STEPS: forall v ls1 m,
            Val.lessdef_list (encode_long (sig_res sg) v) (map ls1 (map R (loc_result sg))) ->
+           Val.has_type v (env res) ->
            agree_callee_save ls ls1 ->
            exists ls2,
            star LTL.step tge (Block ts tf sp bb ls1 m)
@@ -1472,15 +1556,15 @@ Inductive match_stackframes: list RTL.stackframe -> list LTL.stackframe -> signa
 
 Inductive match_states: RTL.state -> LTL.state -> Prop :=
   | match_states_intro:
-      forall s f sp pc rs m ts tf ls m' an e tyenv
+      forall s f sp pc rs m ts tf ls m' an e env
         (STACKS: match_stackframes s ts (fn_sig tf))
         (FUN: transf_function f = OK tf)
-        (ANL: analyze f (pair_codes f tf) = Some an)
-        (EQ: transfer f (pair_codes f tf) pc an!!pc = OK e)
+        (ANL: analyze f env (pair_codes f tf) = Some an)
+        (EQ: transfer f env (pair_codes f tf) pc an!!pc = OK e)
         (SAT: satisf rs ls e)
         (MEM: Mem.extends m m')
-        (WTF: wt_function f tyenv)
-        (WTRS: wt_regset tyenv rs),
+        (WTF: wt_function f env)
+        (WTRS: wt_regset env rs),
       match_states (RTL.State s f sp pc rs m)
                    (LTL.State ts tf sp pc ls m')
   | match_states_call:
@@ -1518,29 +1602,31 @@ Qed.
 
 Ltac UseShape :=
   match goal with
-  | [ CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ = OK _ |- _ ] =>
-      destruct (invert_code _ _ _ _ _ _ CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR); 
+  | [ WT: wt_function _ _, CODE: (RTL.fn_code _)!_ = Some _, EQ: transfer _ _ _ _ _ = OK _ |- _ ] =>
+      destruct (invert_code _ _ _ _ _ _ _ WT CODE EQ) as (eafter & bsh & bb & AFTER & BSH & TCODE & EBS & TR & WTI); 
       inv EBS; unfold transfer_aux in TR; MonadInv
   end.
 
-Ltac UseType :=
-  match goal with
-  | [ CODE: (RTL.fn_code _)!_ = Some _, WTF: wt_function _ _ |- _ ] =>
-      generalize (wt_instrs _ _ WTF _ _ CODE); intro WTI
-  end.
-
 Remark addressing_not_long:
   forall env f addr args dst s r,
-  wt_instr env f (Iload Mint64 addr args dst s) ->
+  wt_instr f env (Iload Mint64 addr args dst s) ->
   In r args -> r <> dst.
 Proof.
   intros. 
   assert (forall ty, In ty (type_of_addressing addr) -> ty = Tint).
   { intros. destruct addr; simpl in H1; intuition. }
-  inv H. 
+  inv H.
   assert (env r = Tint).
-  { apply H1. rewrite <- H5. apply in_map; auto. }
-  simpl in H8; congruence.
+  { generalize args (type_of_addressing addr) H0 H1 H5.
+    induction args0; simpl; intros.
+    contradiction.
+    destruct l. discriminate. InvBooleans. 
+    destruct H2. subst a. 
+    assert (t = Tint) by auto with coqlib. subst t. 
+    destruct (env r); auto || discriminate. 
+    eauto with coqlib. 
+  }
+  red; intros; subst r. rewrite H in H8; discriminate.
 Qed.
 
 (** The proof of semantic preservation is a simulation argument of the
@@ -1551,7 +1637,7 @@ Lemma step_simulation:
   forall S1', match_states S1 S1' ->
   exists S2', plus LTL.step tge S1' t S2' /\ match_states S2 S2'.
 Proof.
-  induction 1; intros S1' MS; inv MS; try UseType; try UseShape.
+  induction 1; intros S1' MS; inv MS; try UseShape.
 
 (* nop *)
   exploit exec_moves; eauto. intros [ls1 [X Y]]. 
@@ -1563,7 +1649,8 @@ Proof.
   econstructor; eauto. 
 
 (* op move *)
-- simpl in H0. inv H0.  
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  simpl in H0. inv H0.  
   exploit (exec_moves mv); eauto. intros [ls1 [X Y]]. 
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -1572,7 +1659,6 @@ Proof.
   exploit satisf_successors; eauto. simpl; eauto. eapply subst_reg_satisf; eauto. 
   intros [enext [U V]]. 
   econstructor; eauto.
-  inv WTI. apply wt_regset_assign; auto. rewrite <- H2. apply WTRS. congruence.
 
 (* op makelong *)
 - generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
@@ -1614,7 +1700,8 @@ Proof.
   econstructor; eauto.
 
 (* op regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iop _ _ _ _ _ _ _ _ _ _ _ WTI H0 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_operation_lessdef; eauto. intros [v' [F G]]. 
   exploit (exec_moves mv2); eauto. intros [ls2 [A2 B2]].
@@ -1627,7 +1714,6 @@ Proof.
   eauto. eauto. eauto. traceEq.
   exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. 
   econstructor; eauto.
-  eapply wt_exec_Iop; eauto. 
 
 (* op dead *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]]. 
@@ -1642,7 +1728,8 @@ Proof.
   eapply wt_exec_Iop; eauto. 
 
 (* load regular *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit transfer_use_def_satisf; eauto. intros [X Y].
   exploit eval_addressing_lessdef; eauto. intros [a' [F G]].
   exploit Mem.loadv_extends; eauto. intros [v' [P Q]]. 
@@ -1656,10 +1743,10 @@ Proof.
   eauto. eauto. eauto. traceEq.
   exploit satisf_successors; eauto. simpl; eauto. intros [enext [U V]]. 
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto.
 
 (* load pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if big_endian then v2 else v1) in *.
   set (v1' := if big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1710,10 +1797,10 @@ Proof.
   eauto. eauto. eauto. eauto. eauto. traceEq.
   exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto.
 
 (* load first word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if big_endian then v2 else v1) in *.
   set (v1' := if big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1741,10 +1828,10 @@ Proof.
   eauto. eauto. eauto. traceEq.
   exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto.
 
 (* load second word of a pair *)
-- exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
+- generalize (wt_exec_Iload _ _ _ _ _ _ _ _ _ _ _ WTI H1 WTRS). intros WTRS'.
+  exploit Mem.loadv_int64_split; eauto. intros (v1 & v2 & LOAD1 & LOAD2 & V12).
   set (v2' := if big_endian then v2 else v1) in *.
   set (v1' := if big_endian then v1 else v2) in *.
   exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
@@ -1773,7 +1860,6 @@ Proof.
   eauto. eauto. eauto. traceEq.
   exploit satisf_successors; eauto. simpl; eauto. intros [enext [W Z]]. 
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto.
 
 (* load dead *)
 - exploit exec_moves; eauto. intros [ls1 [X Y]]. 
@@ -1866,18 +1952,20 @@ Proof.
   econstructor; eauto.
   econstructor; eauto.
   inv WTI. rewrite SIG. auto. 
-  intros. exploit (exec_moves mv2); eauto.
+  intros. exploit (exec_moves mv2). eauto. eauto. 
   eapply function_return_satisf with (v := v) (ls_before := ls1) (ls_after := ls0); eauto.
   eapply add_equation_ros_satisf; eauto. 
   eapply add_equations_args_satisf; eauto. 
-  congruence. intros [ls2 [A2 B2]].
+  congruence.
+  apply wt_regset_assign; auto.
+  intros [ls2 [A2 B2]].
   exists ls2; split. 
   eapply star_right. eexact A2. constructor. traceEq.
   apply satisf_incr with eafter; auto. 
   rewrite SIG. eapply add_equations_args_lessdef; eauto.
-  inv WTI. rewrite <- H7. apply wt_regset_list; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
   simpl. red; auto.
-  rewrite SIG. inv WTI. rewrite <- H7.  apply wt_regset_list; auto. 
+  inv WTI. rewrite SIG. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
 
 (* tailcall *)
 - set (sg := RTL.funsig fd) in *.
@@ -1898,15 +1986,16 @@ Proof.
   eapply match_stackframes_change_sig; eauto. rewrite SIG. rewrite e0. decEq.
   destruct (transf_function_inv _ _ FUN); auto.
   rewrite SIG. rewrite return_regs_arg_values; auto. eapply add_equations_args_lessdef; eauto.
-  inv WTI. rewrite <- H8. apply wt_regset_list; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
   apply return_regs_agree_callee_save.
-  rewrite SIG. inv WTI. rewrite <- H8. apply wt_regset_list; auto. 
+  rewrite SIG. inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
 
 (* builtin *)
-- exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
+- assert (WTRS': wt_regset env (rs#res <- v)) by (eapply wt_exec_Ibuiltin; eauto).
+  exploit (exec_moves mv1); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto.
   eapply add_equations_args_lessdef; eauto.
-  inv WTI. rewrite <- H4. apply wt_regset_list; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (E: map ls1 (map R args') = reglist ls1 args').
   { unfold reglist. rewrite list_map_compose. auto. }
@@ -1932,13 +2021,11 @@ Proof.
   exploit satisf_successors; eauto. simpl; eauto.
   intros [enext [U V]]. 
   econstructor; eauto.
-  inv WTI. apply wt_regset_assign; auto. rewrite H9. 
-  eapply external_call_well_typed; eauto. 
  
 (* annot *)
 - exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]]. 
   exploit external_call_mem_extends; eauto. eapply add_equations_args_lessdef; eauto.
-  inv WTI. simpl in H4. rewrite <- H4. apply wt_regset_list; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
   intros [v' [m'' [F [G [J K]]]]].
   assert (v = Vundef). red in H0; inv H0. auto.
   econstructor; split.
@@ -1989,7 +2076,17 @@ Proof.
 (* return *)
 - destruct (transf_function_inv _ _ FUN). 
   exploit Mem.free_parallel_extends; eauto. rewrite H10. intros [m'' [P Q]].
-  destruct or as [r|]; MonadInv.
+  inv WTI; MonadInv.
+  (* without an argument *)
++ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
+  econstructor; split.
+  eapply plus_left. econstructor; eauto. 
+  eapply star_right. eexact A1.
+  econstructor. eauto. eauto. traceEq.
+  simpl. econstructor; eauto.
+  unfold encode_long, loc_result. rewrite <- H11; rewrite H13. simpl; auto.
+  apply return_regs_agree_callee_save.
+  constructor.
   (* with an argument *)
 + exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
   econstructor; split.
@@ -2003,26 +2100,20 @@ Proof.
   rewrite !list_map_compose. apply list_map_exten; intros.
   unfold return_regs. apply pred_dec_true. eapply loc_result_caller_save; eauto.
   apply return_regs_agree_callee_save.
-  inv WTI. simpl in H13. unfold proj_sig_res. rewrite <- H11; rewrite <- H13. apply WTRS. 
-  (* without an argument *)
-+ exploit (exec_moves mv); eauto. intros [ls1 [A1 B1]].
-  econstructor; split.
-  eapply plus_left. econstructor; eauto. 
-  eapply star_right. eexact A1.
-  econstructor. eauto. eauto. traceEq.
-  simpl. econstructor; eauto.
-  unfold encode_long, loc_result. destruct (sig_res (fn_sig tf)) as [[]|]; simpl; auto.
-  apply return_regs_agree_callee_save.
-  constructor.
+  unfold proj_sig_res. rewrite <- H11; rewrite H13. 
+  eapply Val.has_subtype; eauto. 
 
 (* internal function *)
 - monadInv FUN. simpl in *.
   destruct (transf_function_inv _ _ EQ). 
   exploit Mem.alloc_extends; eauto. apply Zle_refl. rewrite H8; apply Zle_refl. 
   intros [m'' [U V]].
+  assert (WTRS: wt_regset env (init_regs args (fn_params f))).
+  { apply wt_init_regs. inv H0. eapply Val.has_subtype_list; eauto. congruence. }
   exploit (exec_moves mv). eauto. eauto.
     eapply can_undef_satisf; eauto. eapply compat_entry_satisf; eauto. 
-    rewrite call_regs_param_values. rewrite H9. eexact ARGS. 
+    rewrite call_regs_param_values. rewrite H9. eexact ARGS.
+    exact WTRS.
   intros [ls1 [A B]].
   econstructor; split.
   eapply plus_left. econstructor; eauto. 
@@ -2031,7 +2122,6 @@ Proof.
   econstructor; eauto. 
   eauto. eauto. traceEq.
   econstructor; eauto.
-  apply wt_init_regs. inv H0. rewrite wt_params. congruence.
 
 (* external function *)
 - exploit external_call_mem_extends; eauto. intros [v' [m'' [F [G [J K]]]]].
@@ -2059,11 +2149,11 @@ Proof.
  
 (* return *)
 - inv STACKS.
-  exploit STEPS; eauto. intros [ls2 [A B]].
+  exploit STEPS; eauto. eapply Val.has_subtype; eauto. intros [ls2 [A B]].
   econstructor; split.
   eapply plus_left. constructor. eexact A. traceEq.
   econstructor; eauto.
-  apply wt_regset_assign; auto. congruence.  
+  apply wt_regset_assign; auto. eapply Val.has_subtype; eauto. 
 Qed.
 
 Lemma initial_states_simulation:
diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml
index 1a19f71..1ca13ff 100644
--- a/backend/CMtypecheck.ml
+++ b/backend/CMtypecheck.ml
@@ -15,25 +15,31 @@
 
 (* A type-checker for Cminor *)
 
+(* FIXME: proper support for type Tsingle *)
+
 open Printf
 open Datatypes
 open Camlcoq
 open AST
+open PrintAST
 open Integers
 open Cminor
 
 exception Error of string
 
-let name_of_typ = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long"
-
 type ty = Base of typ | Var of ty option ref
 
 let newvar () = Var (ref None)
 let tint = Base Tint
 let tfloat = Base Tfloat
 let tlong = Base Tlong
+let tsingle = Base Tsingle
 
-let ty_of_typ = function Tint -> tint | Tfloat -> tfloat | Tlong -> tlong
+let ty_of_typ = function
+  | Tint -> tint
+  | Tfloat -> tfloat
+  | Tlong -> tlong
+  | Tsingle -> tsingle
 
 let ty_of_sig_args tyl = List.map ty_of_typ tyl
 
@@ -47,7 +53,7 @@ let unify t1 t2 =
   | Base b1, Base b2 ->
       if b1 <> b2 then
         raise (Error (sprintf "Expected type %s, actual type %s\n"
-                              (name_of_typ b1) (name_of_typ b2)))
+                              (name_of_type b1) (name_of_type b2)))
   | Base b, Var r -> r := Some (Base b)
   | Var r, Base b -> r := Some (Base b)
   | Var r1, Var r2 -> r1 := Some (Var r2)
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 65f67ad..e0dbce8 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -661,7 +661,7 @@ Lemma add_store_satisfiable:
   numbering_satisfiable ge sp rs m n ->
   eval_addressing ge sp addr rs##args = Some a ->
   Mem.storev chunk m a (rs#src) = Some m' ->
-  Val.has_type (rs#src) (type_of_chunk chunk) ->
+  Val.has_type (rs#src) (type_of_chunk_use chunk) ->
   numbering_satisfiable ge sp rs m' (add_store n chunk addr args src).
 Proof.
   intros. unfold add_store. destruct H0 as [valu A].
@@ -916,7 +916,7 @@ Inductive match_stackframes: list stackframe -> list stackframe -> typ -> Prop :
            (ANALYZE: analyze f = Some approx)
            (WTF: wt_function f tyenv)
            (WTREGS: wt_regset tyenv rs)
-           (TYRES: tyenv res = ty)
+           (TYRES: subtype ty (tyenv res) = true)
            (SAT: forall v m, numbering_satisfiable ge sp (rs#res <- v) m approx!!pc)
            (STACKS: match_stackframes s s' (proj_sig_res (fn_sig f))),
     match_stackframes
@@ -998,7 +998,7 @@ Proof.
   rewrite <- RES. apply eval_operation_preserved. exact symbols_preserved.
   (* state matching *)
   econstructor; eauto.
-  eapply wt_exec_Iop; eauto. eapply wt_instrs; eauto.  
+  eapply wt_exec_Iop; eauto. eapply wt_instr_at; eauto.  
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H. 
   eapply add_op_satisfiable; eauto. eapply wf_analyze; eauto.
@@ -1025,7 +1025,7 @@ Proof.
   eapply exec_Iload; eauto.
   (* state matching *)
   econstructor; eauto.
-  eapply wt_exec_Iload; eauto. eapply wt_instrs; eauto.
+  eapply wt_exec_Iload; eauto. eapply wt_instr_at; eauto.
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H. 
   eapply add_load_satisfiable; eauto. eapply wf_analyze; eauto.
@@ -1048,42 +1048,40 @@ Proof.
   eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H.
   eapply add_store_satisfiable; eauto. eapply wf_analyze; eauto.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
-  rewrite <- H8. auto.
+  exploit wt_instr_at; eauto. intros WTI; inv WTI.
+  eapply Val.has_subtype; eauto. 
 
   (* Icall *)
   exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. 
   econstructor; split.
   eapply exec_Icall; eauto.
   apply sig_preserved; auto.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  exploit wt_instr_at; eauto. intros WTI; inv WTI.
   econstructor; eauto. 
   econstructor; eauto. 
   intros. eapply analysis_correct_1; eauto. simpl; auto. 
   unfold transfer; rewrite H. 
   apply empty_numbering_satisfiable.
-  rewrite <- H7. apply wt_regset_list; auto. 
+  eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto. 
 
   (* Itailcall *)
   exploit find_function_translated; eauto. intros [tf [FIND' TRANSF']]. 
   econstructor; split.
   eapply exec_Itailcall; eauto.
   apply sig_preserved; auto.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
+  exploit wt_instr_at; eauto. intros WTI; inv WTI.
   econstructor; eauto. 
   replace (proj_sig_res (funsig fd)) with (proj_sig_res (fn_sig f)). auto. 
   unfold proj_sig_res. rewrite H6; auto.
-  rewrite <- H7. apply wt_regset_list; auto.
+  eapply Val.has_subtype_list; eauto. apply wt_regset_list; auto.
 
   (* Ibuiltin *)
   econstructor; split.
   eapply exec_Ibuiltin; eauto. 
   eapply external_call_symbols_preserved; eauto.
   exact symbols_preserved. exact varinfo_preserved.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
   econstructor; eauto.
-  apply wt_regset_assign; auto. 
-  rewrite H6. eapply external_call_well_typed; eauto. 
+  eapply wt_exec_Ibuiltin; eauto. eapply wt_instr_at; eauto.
   eapply analysis_correct_1; eauto. simpl; auto.
   unfold transfer; rewrite H.
   assert (CASE1: numbering_satisfiable ge sp (rs#res <- v) m' empty_numbering).
@@ -1124,8 +1122,9 @@ Proof.
   econstructor; split.
   eapply exec_Ireturn; eauto.
   econstructor; eauto.
-  generalize (wt_instrs _ _ WTF pc _ H); intro WTI; inv WTI.
-  unfold proj_sig_res. rewrite <- H2. destruct or; simpl; auto. 
+  exploit wt_instr_at; eauto. intros WTI; inv WTI; simpl.
+  auto.
+  unfold proj_sig_res; rewrite H2. eapply Val.has_subtype; eauto.
 
   (* internal function *)
   monadInv H7. unfold transf_function in EQ. 
@@ -1135,7 +1134,7 @@ Proof.
   econstructor; split.
   eapply exec_function_internal; eauto. 
   simpl. econstructor; eauto.
-  apply wt_init_regs. inv WTF. rewrite wt_params; auto.
+  apply wt_init_regs. inv WTF. eapply Val.has_subtype_list; eauto.
   apply analysis_correct_entry; auto.
 
   (* external function *)
@@ -1152,7 +1151,7 @@ Proof.
   econstructor; split.
   eapply exec_return; eauto.
   econstructor; eauto.
-  apply wt_regset_assign; auto. 
+  apply wt_regset_assign; auto. eapply Val.has_subtype; eauto.  
 Qed.
 
 Lemma transf_initial_states:
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 573c3d7..79d66b9 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -245,7 +245,13 @@ type graph = {
 
 let num_register_classes = 2
 
-let class_of_type = function Tint -> 0 | Tfloat -> 1 | Tlong -> assert false
+let class_of_type = function
+  | Tint -> 0
+  | Tfloat | Tsingle -> 1
+  | Tlong -> assert false
+
+let type_of_class c =
+  if c = 0 then Tint else Tfloat
 
 let reserved_registers = ref ([]: mreg list)
 
@@ -860,7 +866,7 @@ let assign_color g n =
           n.color <- Some loc
       | None ->
           (* Last, pick a Local stack slot *)
-          n.color <- Some (find_slot slot_conflicts n.typ)
+          n.color <- Some (find_slot slot_conflicts (type_of_class n.regclass))
 
 (* Extract the location of a variable *)
 
@@ -877,7 +883,7 @@ let location_of_var g v =
       with Not_found ->
         match ty with
         | Tint -> R dummy_int_reg
-        | Tfloat -> R dummy_float_reg
+        | Tfloat | Tsingle -> R dummy_float_reg
         | Tlong -> assert false
 
 (* The exported interface *)
diff --git a/backend/LTL.v b/backend/LTL.v
index 27fee8a..d1db2c5 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -172,7 +172,7 @@ Fixpoint undef_regs (rl: list mreg) (rs: locset) : locset :=
   | r1 :: rl => Locmap.set (R r1) Vundef (undef_regs rl rs)
   end.
 
-Definition destroyed_by_getstack (s: slot) : list mreg :=
+Definition destroyed_by_getstack (s: slot): list mreg :=
   match s with
   | Incoming => temp_for_parent_frame :: nil
   | _        => nil
@@ -218,7 +218,7 @@ Inductive step: state -> trace -> state -> Prop :=
       step (Block s f sp (Lgetstack sl ofs ty dst :: bb) rs m)
         E0 (Block s f sp bb rs' m)
   | exec_Lsetstack: forall s f sp src sl ofs ty bb rs m rs',
-      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) ->
+      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_setstack ty) rs) ->
       step (Block s f sp (Lsetstack src sl ofs ty :: bb) rs m)
         E0 (Block s f sp bb rs' m)
   | exec_Lstore: forall s f sp chunk addr args src bb rs m a rs' m',
diff --git a/backend/Linear.v b/backend/Linear.v
index bdb08b4..3c52436 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -160,7 +160,7 @@ Inductive step: state -> trace -> state -> Prop :=
         E0 (State s f sp b rs' m)
   | exec_Lsetstack:
       forall s f sp src sl ofs ty b rs m rs',
-      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_op Omove) rs) ->
+      rs' = Locmap.set (S sl ofs ty) (rs (R src)) (undef_regs (destroyed_by_setstack ty) rs) ->
       step (State s f sp (Lsetstack src sl ofs ty :: b) rs m)
         E0 (State s f sp b rs' m)
   | exec_Lop:
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index c51db6f..73c5453 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -45,7 +45,7 @@ Definition slot_valid (sl: slot) (ofs: Z) (ty: typ): bool :=
   | Incoming => In_dec Loc.eq (S Incoming ofs ty) (loc_parameters funct.(fn_sig))
   end &&
   match ty with
-  | Tint | Tfloat => true
+  | Tint | Tfloat | Tsingle => true
   | Tlong => false
   end.
 
@@ -66,23 +66,23 @@ Definition loc_valid (l: loc) : bool :=
 Definition wt_instr (i: instruction) : bool :=
   match i with
   | Lgetstack sl ofs ty r =>
-      typ_eq ty (mreg_type r) && slot_valid sl ofs ty
+      subtype ty (mreg_type r) && slot_valid sl ofs ty
   | Lsetstack r sl ofs ty =>
-      typ_eq ty (mreg_type r) && slot_valid sl ofs ty && slot_writable sl
+      slot_valid sl ofs ty && slot_writable sl
   | Lop op args res =>
       match is_move_operation op args with
       | Some arg =>
-          typ_eq (mreg_type arg) (mreg_type res)
+          subtype (mreg_type arg) (mreg_type res)
       | None => 
           let (targs, tres) := type_of_operation op in
-          typ_eq (mreg_type res) tres
+          subtype tres (mreg_type res)
       end
   | Lload chunk addr args dst =>
-      typ_eq (mreg_type dst) (type_of_chunk chunk)
+      subtype (type_of_chunk chunk) (mreg_type dst)
   | Ltailcall sg ros =>
       zeq (size_arguments sg) 0
   | Lbuiltin ef args res =>
-      list_typ_eq (map mreg_type res) (proj_sig_res' (ef_sig ef))
+      subtype_list (proj_sig_res' (ef_sig ef)) (map mreg_type res) 
   | Lannot ef args =>
       forallb loc_valid args
   | _ =>
@@ -104,28 +104,35 @@ Require Import Values.
 Definition wt_locset (ls: locset) : Prop :=
   forall l, Val.has_type (ls l) (Loc.type l).
 
-Lemma wt_setloc:
-  forall ls l v,
-  Val.has_type v (Loc.type l) -> wt_locset ls -> wt_locset (Locmap.set l v ls).
+Lemma wt_setreg:
+  forall ls r v,
+  Val.has_type v (mreg_type r) -> wt_locset ls -> wt_locset (Locmap.set (R r) v ls).
 Proof.
   intros; red; intros.
   unfold Locmap.set. 
-  destruct (Loc.eq l l0). congruence.
-  destruct (Loc.diff_dec l l0). auto. red. auto.
+  destruct (Loc.eq (R r) l).
+  subst l; auto.
+  destruct (Loc.diff_dec (R r) l). auto. red. auto.
 Qed.
 
-Lemma wt_setlocs:
-  forall ll vl ls,
-  Val.has_type_list vl (map Loc.type ll) -> wt_locset ls -> wt_locset (Locmap.setlist ll vl ls).
+Lemma wt_setstack:
+  forall ls sl ofs ty v,
+  wt_locset ls -> wt_locset (Locmap.set (S sl ofs ty) v ls).
 Proof.
-  induction ll; destruct vl; simpl; intuition. 
-  apply IHll; auto. apply wt_setloc; auto.
+  intros; red; intros.
+  unfold Locmap.set. 
+  destruct (Loc.eq (S sl ofs ty) l).
+  subst l. simpl. 
+  generalize (Val.load_result_type (chunk_of_type ty) v). 
+  replace (type_of_chunk (chunk_of_type ty)) with ty. auto.
+  destruct ty; reflexivity.
+  destruct (Loc.diff_dec (S sl ofs ty) l). auto. red. auto.
 Qed.
 
 Lemma wt_undef_regs:
   forall rs ls, wt_locset ls -> wt_locset (undef_regs rs ls).
 Proof.
-  induction rs; simpl; intros. auto. apply wt_setloc; auto. red; auto.
+  induction rs; simpl; intros. auto. apply wt_setreg; auto. red; auto.
 Qed.
 
 Lemma wt_call_regs:
@@ -161,10 +168,11 @@ Lemma wt_setlist_result:
 Proof.
   unfold loc_result, encode_long, proj_sig_res; intros.
   destruct (sig_res sg) as [[] | ]; simpl.
-  apply wt_setloc; auto.
-  apply wt_setloc; auto.
-  destruct v; simpl in H; try contradiction;
-  simpl; apply wt_setloc; auto; apply wt_setloc; auto.
-  apply wt_setloc; auto.
+- apply wt_setreg; auto.
+- apply wt_setreg; auto.
+- destruct v; simpl in H; try contradiction;
+  simpl; apply wt_setreg; auto; apply wt_setreg; auto.
+- apply wt_setreg; auto. apply Val.has_subtype with Tsingle; auto. 
+- apply wt_setreg; auto.
 Qed.
 
diff --git a/backend/Locations.v b/backend/Locations.v
index f7fb607..43ce210 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -69,7 +69,7 @@ Defined.
 Open Scope Z_scope.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 end.
+  match ty with Tint => 1 | Tlong => 2 | Tfloat => 2 | Tsingle => 1 end.
 
 Lemma typesize_pos:
   forall (ty: typ), typesize ty > 0.
@@ -292,17 +292,38 @@ Module Locmap.
       maps location [l] to value [v], locations that overlap with [l]
       to [Vundef], and locations that are different (and non-overlapping)
       from [l] to their previous values in [m].  This is apparent in the
-      ``good variables'' properties [Locmap.gss] and [Locmap.gso]. *)
+      ``good variables'' properties [Locmap.gss] and [Locmap.gso].
+
+      Additionally, the [set] operation also anticipates the fact that
+      abstract stack slots are mapped to concrete memory locations
+      in the [Stacking] phase.  Hence, values stored in stack slots
+      are normalized according to the type of the slot. *)
 
   Definition set (l: loc) (v: val) (m: t) : t :=
     fun (p: loc) =>
-      if Loc.eq l p then v else if Loc.diff_dec l p then m p else Vundef.
+      if Loc.eq l p then
+        match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end
+      else if Loc.diff_dec l p then
+        m p
+      else Vundef.
+
+  Lemma gss: forall l v m,
+    (set l v m) l = 
+    match l with R r => v | S sl ofs ty => Val.load_result (chunk_of_type ty) v end.
+  Proof.
+    intros. unfold set. apply dec_eq_true.
+  Qed.
 
-  Lemma gss: forall l v m, (set l v m) l = v.
+  Lemma gss_reg: forall r v m, (set (R r) v m) (R r) = v.
   Proof.
     intros. unfold set. rewrite dec_eq_true. auto.
   Qed.
 
+  Lemma gss_typed: forall l v m, Val.has_type v (Loc.type l) -> (set l v m) l = v.
+  Proof.
+    intros. rewrite gss. destruct l. auto. apply Val.load_result_same; auto. 
+  Qed.
+
   Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p.
   Proof.
     intros. unfold set. destruct (Loc.eq l p).
@@ -328,10 +349,11 @@ Module Locmap.
   Proof.
     assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
       induction ll; simpl; intros. auto. apply IHll. 
-      unfold set. destruct (Loc.eq a l); auto. 
+      unfold set. destruct (Loc.eq a l).
+      destruct a. auto. destruct ty; reflexivity. 
       destruct (Loc.diff_dec a l); auto.
     induction ll; simpl; intros. contradiction. 
-    destruct H. apply P. subst a. apply gss. 
+    destruct H. apply P. subst a. apply gss_typed. exact I. 
     auto.
   Qed.
 
@@ -355,7 +377,12 @@ End Locmap.
 Module IndexedTyp <: INDEXED_TYPE.
   Definition t := typ.
   Definition index (x: t) :=
-    match x with Tint => 1%positive | Tfloat => 2%positive | Tlong => 3%positive end.
+    match x with
+    | Tint => 1%positive
+    | Tsingle => 2%positive
+    | Tfloat => 3%positive
+    | Tlong => 4%positive
+    end.
   Lemma index_inj: forall x y, index x = index y -> x = y.
   Proof. destruct x; destruct y; simpl; congruence. Qed.
   Definition eq := typ_eq.
@@ -467,9 +494,9 @@ Module OrderedLoc <: OrderedType.
         destruct H. right.
         destruct H0. right. generalize (RANGE ty'); omega. 
         destruct H0. 
-        assert (ty' = Tint). 
-        { unfold OrderedTyp.lt in H1. destruct ty'; compute in H1; congruence. }
-        subst ty'. right. simpl typesize. omega. 
+        assert (ty' = Tint \/ ty' = Tsingle). 
+        { unfold OrderedTyp.lt in H1. destruct ty'; auto; compute in H1; congruence. }
+        right. destruct H2; subst ty'; simpl typesize; omega. 
       + destruct H. left. apply OrderedSlot.lt_not_eq; auto.
         destruct H. right.
         destruct H0. left; omega.
@@ -494,6 +521,8 @@ Module OrderedLoc <: OrderedType.
       right; split. omega. compute. auto. 
       left; omega. 
       left; omega.
+      destruct (zlt ofs' (ofs - 1)). left; auto. 
+      right; split. omega. compute. auto. 
   Qed.
 
 End OrderedLoc.
diff --git a/backend/Mach.v b/backend/Mach.v
index ec48195..316e788 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -296,7 +296,7 @@ Inductive step: state -> trace -> state -> Prop :=
   | exec_Msetstack:
       forall s f sp src ofs ty c rs m m' rs',
       store_stack m sp ty ofs (rs src) = Some m' ->
-      rs' = undef_regs (destroyed_by_op Omove) rs ->
+      rs' = undef_regs (destroyed_by_setstack ty) rs ->
       step (State s f sp (Msetstack src ofs ty :: c) rs m)
         E0 (State s f sp c rs' m')
   | exec_Mgetparam:
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index 8612b73..4fbc600 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -168,6 +168,7 @@ let name_of_type = function
   | Tint -> "int"
   | Tfloat -> "float"
   | Tlong -> "long"
+  | Tsingle -> "single"
 
 let rec print_sig p = function
   | {sig_args = []; sig_res = None} -> fprintf p "void"
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index 756fc58..258baba 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -29,7 +29,11 @@ let mreg pp r =
   | Some s -> fprintf pp "%s" s
   | None -> fprintf pp "<unknown machreg>"
 
-let short_name_of_type = function Tint -> 'i' | Tfloat -> 'f' | Tlong -> 'l'
+let short_name_of_type = function
+  | Tint -> 'i'
+  | Tfloat -> 'f'
+  | Tlong -> 'l'
+  | Tsingle -> 's'
 
 let loc pp = function
   | R r -> mreg pp r
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 7ed7344..b4702cf 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -12,9 +12,9 @@
 
 (** Typing rules and a type inference algorithm for RTL. *)
 
-Require Import Recdef.
 Require Import Coqlib.
 Require Import Errors.
+Require Import Subtyping.
 Require Import Maps.
 Require Import AST.
 Require Import Op.
@@ -32,9 +32,11 @@ Require Import Conventions.
 (** Like Cminor and all intermediate languages, RTL can be equipped with
   a simple type system that statically guarantees that operations
   and addressing modes are applied to the right number of arguments
-  and that the arguments are of the correct types.  The type algebra
-  is trivial, consisting of the two types [Tint] (for integers and pointers)
-  and [Tfloat] (for floats).  
+  and that the arguments are of the correct types.   The type algebra
+  is very simple, consisting of the four types [Tint] (for integers
+  and pointers), [Tfloat] (for double-precision floats), [Tlong]
+  (for 64-bit integers) and [Tsingle] (for single-precision floats).
+  The only subtlety is that [Tsingle] is a subtype of [Tfloat].
 
   Additionally, we impose that each pseudo-register has the same type
   throughout the function.  This requirement helps with register allocation,
@@ -58,8 +60,8 @@ Definition regenv := reg -> typ.
 
 Section WT_INSTR.
 
-Variable env: regenv.
 Variable funct: function.
+Variable env: regenv.
 
 Definition valid_successor (s: node) : Prop :=
   exists i, funct.(fn_code)!s = Some i.
@@ -71,63 +73,68 @@ Inductive wt_instr : instruction -> Prop :=
       wt_instr (Inop s)
   | wt_Iopmove:
       forall r1 r s,
-      env r1 = env r ->
+      subtype (env r1) (env r) = true ->
       valid_successor s ->
       wt_instr (Iop Omove (r1 :: nil) r s)
   | wt_Iop:
       forall op args res s,
       op <> Omove ->
-      (List.map env args, env res) = type_of_operation op ->
+      subtype_list (map env args) (fst (type_of_operation op)) = true ->
+      subtype (snd (type_of_operation op)) (env res) = true ->
       valid_successor s ->
       wt_instr (Iop op args res s)
   | wt_Iload:
       forall chunk addr args dst s,
-      List.map env args = type_of_addressing addr ->
-      env dst = type_of_chunk chunk ->
+      subtype_list (map env args) (type_of_addressing addr) = true ->
+      subtype (type_of_chunk chunk) (env dst) = true ->
       valid_successor s ->
       wt_instr (Iload chunk addr args dst s)
   | wt_Istore:
       forall chunk addr args src s,
-      List.map env args = type_of_addressing addr ->
-      env src = type_of_chunk chunk ->
+      subtype_list (map env args) (type_of_addressing addr) = true ->
+      subtype (env src) (type_of_chunk_use chunk) = true ->
       valid_successor s ->
       wt_instr (Istore chunk addr args src s)
   | wt_Icall:
       forall sig ros args res s,
-      match ros with inl r => env r = Tint | inr s => True end ->
-      List.map env args = sig.(sig_args) ->
-      env res = proj_sig_res sig ->
+      match ros with inl r => subtype (env r) Tint = true | inr s => True end ->
+      subtype_list (map env args) sig.(sig_args) = true ->
+      subtype (proj_sig_res sig) (env res) = true ->
       valid_successor s ->
       wt_instr (Icall sig ros args res s)
   | wt_Itailcall:
       forall sig ros args,
-      match ros with inl r => env r = Tint | inr s => True end ->
+      match ros with inl r => subtype (env r) Tint = true | inr s => True end ->
       sig.(sig_res) = funct.(fn_sig).(sig_res) ->
-      List.map env args = sig.(sig_args) ->
+      subtype_list (map env args) sig.(sig_args) = true ->
       tailcall_possible sig ->
       wt_instr (Itailcall sig ros args)
   | wt_Ibuiltin:
       forall ef args res s,
-      List.map env args = (ef_sig ef).(sig_args) ->
-      env res = proj_sig_res (ef_sig ef) ->
+      subtype_list (map env args) (ef_sig ef).(sig_args) = true ->
+      subtype (proj_sig_res (ef_sig ef)) (env res) = true ->
       valid_successor s ->
       wt_instr (Ibuiltin ef args res s)
   | wt_Icond:
       forall cond args s1 s2,
-      List.map env args = type_of_condition cond ->
+      subtype_list (map env args) (type_of_condition cond) = true ->
       valid_successor s1 ->
       valid_successor s2 ->
       wt_instr (Icond cond args s1 s2)
   | wt_Ijumptable:
       forall arg tbl,
-      env arg = Tint ->
+      subtype (env arg) Tint = true ->
       (forall s, In s tbl -> valid_successor s) ->
       list_length_z tbl * 4 <= Int.max_unsigned ->
       wt_instr (Ijumptable arg tbl)
-  | wt_Ireturn: 
-      forall optres,
-      option_map env optres = funct.(fn_sig).(sig_res) ->
-      wt_instr (Ireturn optres).
+  | wt_Ireturn_none:
+      funct.(fn_sig).(sig_res) = None ->
+      wt_instr (Ireturn None)
+  | wt_Ireturn_some:
+      forall arg ty,
+      funct.(fn_sig).(sig_res) = Some ty ->
+      subtype (env arg) ty = true ->
+      wt_instr (Ireturn (Some arg)).
 
 End WT_INSTR.
 
@@ -139,12 +146,12 @@ End WT_INSTR.
 Record wt_function (f: function) (env: regenv): Prop :=
   mk_wt_function {
     wt_params:
-      List.map env f.(fn_params) = f.(fn_sig).(sig_args);
+      subtype_list f.(fn_sig).(sig_args) (map env f.(fn_params)) = true;
     wt_norepet:
       list_norepet f.(fn_params);
     wt_instrs:
       forall pc instr, 
-      f.(fn_code)!pc = Some instr -> wt_instr env f instr;
+      f.(fn_code)!pc = Some instr -> wt_instr f env instr;
     wt_entrypoint:
       valid_successor f f.(fn_entrypoint)
 }.
@@ -161,66 +168,138 @@ Definition wt_program (p: program): Prop :=
 
 (** * Type inference *)
 
-Section INFERENCE.
+(** Type inference reuses the generic solver for subtyping constraints
+  defined in module [Subtyping]. *)
 
-Local Open Scope error_monad_scope.
+Module RTLtypes <: TYPE_ALGEBRA.
 
-Variable f: function.
+Definition t := typ.
+Definition eq := typ_eq.
+Definition default := Tint.
 
-(** The type inference engine operates over a state that has two components:
-- [te_typ]: a partial map from pseudoregisters to types, for
-  pseudoregs whose types are already determined from their uses;
-- [te_eqs]: a list of pairs [(r1,r2)] of pseudoregisters, indicating that
-  [r1] and [r2] must have the same type because they are involved in a move
-  [r1 := r2] or [r2 := r1], but this type is not yet determined.
-*)
+Definition sub (t1 t2: typ): Prop := subtype t1 t2 = true.
 
-Record typenv : Type := Typenv {
-  te_typ: PTree.t typ;            (**r mapping reg -> known type *)
-  te_eqs: list (reg * reg)        (**r pairs of regs that must have same type *)
-}.
+Lemma sub_refl: forall ty, sub ty ty.
+Proof. unfold sub; destruct ty; auto. Qed.
 
-(** Determine that [r] must have type [ty]. *)
+Lemma sub_trans: forall ty1 ty2 ty3, sub ty1 ty2 -> sub ty2 ty3 -> sub ty1 ty3.
+Proof.
+  unfold sub; intros. destruct ty1; destruct ty2; try discriminate; destruct ty3; auto; discriminate.
+Qed.
 
-Definition type_reg (e: typenv) (r: reg) (ty: typ) : res typenv :=
-  match e.(te_typ)!r with
-  | None => OK {| te_typ := PTree.set r ty e.(te_typ); te_eqs := e.(te_eqs) |}
-  | Some ty' => if typ_eq ty ty' then OK e else Error (MSG "bad type for variable " :: POS r :: nil)
-  end.
+Definition sub_dec: forall ty1 ty2, {sub ty1 ty2} + {~sub ty1 ty2}.
+Proof.
+  unfold sub; intros. destruct (subtype ty1 ty2). left; auto. right; congruence.
+Defined.
 
-Fixpoint type_regs (e: typenv) (rl: list reg) (tyl: list typ) {struct rl}: res typenv :=
-  match rl, tyl with
-  | nil, nil => OK e
-  | r1::rs, ty1::tys => do e1 <- type_reg e r1 ty1; type_regs e1 rs tys
-  | _, _ => Error (msg "arity mismatch")
+Definition lub (ty1 ty2: typ) :=
+  match ty1, ty2 with
+  | Tint, Tint => Tint
+  | Tlong, Tlong => Tlong
+  | Tfloat, Tfloat => Tfloat
+  | Tsingle, Tsingle => Tsingle
+  | Tfloat, Tsingle => Tfloat
+  | Tsingle, Tfloat => Tfloat
+  | _, _ => default
   end.
 
-Definition type_ros (e: typenv) (ros: reg + ident) : res typenv :=
-  match ros with
-  | inl r => type_reg e r Tint
-  | inr s => OK e
-  end.
+Lemma lub_left: forall x y z, sub x z -> sub y z -> sub x (lub x y).
+Proof.
+  unfold sub, lub; intros. destruct x; destruct y; auto; destruct z; discriminate.
+Qed.
 
-(** Determine that [r1] and [r2] must have the same type.  If none of
-  [r1] and [r2] has known type, just record an equality constraint.
-  The boolean result is [true] if one of the types of [r1] and [r2]
-  was previously unknown and could be determined because the other type is
-  known.  Otherwise, [te_typ] does not change and [false] is returned. *)
-
-Function type_move (e: typenv) (r1 r2: reg) : res (bool * typenv) :=
-  match e.(te_typ)!r1, e.(te_typ)!r2 with
-  | None, None =>
-      OK (false, {| te_typ := e.(te_typ); te_eqs := (r1, r2) :: e.(te_eqs) |})
-  | Some ty1, None =>
-      OK (true, {| te_typ := PTree.set r2 ty1 e.(te_typ); te_eqs := e.(te_eqs) |})
-  | None, Some ty2 =>
-      OK (true, {| te_typ := PTree.set r1 ty2 e.(te_typ); te_eqs := e.(te_eqs) |})
-  | Some ty1, Some ty2 =>
-      if typ_eq ty1 ty2
-      then OK (false, e)
-      else Error(MSG "ill-typed move between " :: POS r1 :: MSG " and " :: POS r2 :: nil)
+Lemma lub_right: forall x y z, sub x z -> sub y z -> sub y (lub x y).
+Proof.
+  unfold sub, lub; intros. destruct x; destruct y; auto; destruct z; discriminate.
+Qed.
+
+Lemma lub_min: forall x y z, sub x z -> sub y z -> sub (lub x y) z.
+Proof.
+  unfold sub, lub; intros. destruct x; destruct z; try discriminate; destruct y; auto; discriminate.
+Qed.
+
+Definition glb (ty1 ty2: typ) :=
+  match ty1, ty2 with
+  | Tint, Tint => Tint
+  | Tlong, Tlong => Tlong
+  | Tfloat, Tfloat => Tfloat
+  | Tsingle, Tsingle => Tsingle
+  | Tfloat, Tsingle => Tsingle
+  | Tsingle, Tfloat => Tsingle
+  | _, _ => default
   end.
 
+Lemma glb_left: forall x y z, sub z x -> sub z y -> sub (glb x y) x.
+Proof.
+  unfold sub, glb; intros. destruct x; destruct y; auto; destruct z; discriminate.
+Qed.
+
+Lemma glb_right: forall x y z, sub z x -> sub z y -> sub (glb x y) y.
+Proof.
+  unfold sub, glb; intros. destruct x; destruct y; auto; destruct z; discriminate.
+Qed.
+
+Lemma glb_max: forall x y z, sub z x -> sub z y -> sub z (glb x y).
+Proof.
+  unfold sub, glb; intros. destruct x; destruct z; try discriminate; destruct y; auto; discriminate.
+Qed.
+
+Definition low_bound (ty: typ) :=
+  match ty with Tfloat => Tsingle | _ => ty end.
+
+Definition high_bound (ty: typ) :=
+  match ty with Tsingle => Tfloat | _ => ty end.
+
+Lemma low_bound_sub: forall t, sub (low_bound t) t.
+Proof.
+  unfold sub; destruct t0; auto.
+Qed.
+
+Lemma low_bound_minorant: forall x y, sub x y -> sub (low_bound y) x.
+Proof.
+  unfold sub; intros. destruct x; destruct y; auto; discriminate.
+Qed.
+
+Lemma high_bound_sub: forall t, sub t (high_bound t).
+Proof.
+  unfold sub; destruct t0; auto.
+Qed.
+
+Lemma high_bound_majorant: forall x y, sub x y -> sub y (high_bound x).
+Proof.
+  unfold sub; intros. destruct x; destruct y; auto; discriminate.
+Qed.
+
+Definition weight (t: typ) :=
+  match t with Tfloat => 1%nat | _ => 0%nat end.
+
+Definition max_weight := 1%nat.
+
+Lemma weight_range: forall t, (weight t <= max_weight)%nat.
+Proof.
+  unfold max_weight; destruct t0; simpl; omega.
+Qed.
+
+Lemma weight_sub: forall x y, sub x y -> (weight x <= weight y)%nat.
+Proof.
+  unfold sub; intros. destruct x; destruct y; simpl; discriminate || omega.
+Qed.
+
+Lemma weight_sub_strict: forall x y, sub x y -> x <> y -> (weight x < weight y)%nat.
+Proof.
+  unfold sub, subtype; intros. destruct x; destruct y; simpl; congruence || omega.
+Qed.
+
+End RTLtypes.
+
+Module S := SubSolver(RTLtypes).
+
+Section INFERENCE.
+
+Local Open Scope error_monad_scope.
+
+Variable f: function.
+
 (** Checking the validity of successor nodes. *)
 
 Definition check_successor (s: node): res unit :=
@@ -235,15 +314,18 @@ Fixpoint check_successors (sl: list node): res unit :=
   | s1 :: sl' => do x <- check_successor s1; check_successors sl'
   end.
 
+(** Check structural constraints and process / record all type constraints. *)
+
+Definition type_ros (e: S.typenv) (ros: reg + ident) : res S.typenv :=
+  match ros with
+  | inl r => S.type_use e r Tint
+  | inr s => OK e
+  end.
+
 Definition is_move (op: operation) : bool :=
   match op with Omove => true | _ => false end.
 
-(** First pass: check structural constraints and process all type constraints
-  of the form [typeof(r) = ty] arising from the RTL instructions.
-  Simultaneously, record equations [typeof(r1) = typeof(r2)] arising
-  from move instructions that cannot be immediately resolved. *)
-
-Definition type_instr (e: typenv) (i: instruction) : res typenv :=
+Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
   match i with
   | Inop s =>
       do x <- check_successor s; OK e
@@ -251,28 +333,28 @@ Definition type_instr (e: typenv) (i: instruction) : res typenv :=
       do x <- check_successor s;
       if is_move op then
         match args with
-        | arg :: nil => do (changed, e') <- type_move e arg res; OK e'
+        | arg :: nil => do (changed, e') <- S.type_move e arg res; OK e'
         | _ => Error (msg "ill-formed move")
         end
       else
        (let (targs, tres) := type_of_operation op in
-        do e1 <- type_regs e args targs; type_reg e1 res tres)
+        do e1 <- S.type_uses e args targs; S.type_def e1 res tres)
   | Iload chunk addr args dst s =>
       do x <- check_successor s;
-      do e1 <- type_regs e args (type_of_addressing addr);
-      type_reg e1 dst (type_of_chunk chunk)
+      do e1 <- S.type_uses e args (type_of_addressing addr);
+      S.type_def e1 dst (type_of_chunk chunk)
   | Istore chunk addr args src s =>
       do x <- check_successor s;
-      do e1 <- type_regs e args (type_of_addressing addr);
-      type_reg e1 src (type_of_chunk chunk)
+      do e1 <- S.type_uses e args (type_of_addressing addr);
+      S.type_use e1 src (type_of_chunk_use chunk)
   | Icall sig ros args res s =>
       do x <- check_successor s;
       do e1 <- type_ros e ros;
-      do e2 <- type_regs e1 args sig.(sig_args);
-      type_reg e2 res (proj_sig_res sig)
+      do e2 <- S.type_uses e1 args sig.(sig_args);
+      S.type_def e2 res (proj_sig_res sig)
   | Itailcall sig ros args =>
       do e1 <- type_ros e ros;
-      do e2 <- type_regs e1 args sig.(sig_args);
+      do e2 <- S.type_uses e1 args sig.(sig_args);
       if opt_typ_eq sig.(sig_res) f.(fn_sig).(sig_res) then
         if tailcall_is_possible sig
         then OK e2
@@ -281,25 +363,25 @@ Definition type_instr (e: typenv) (i: instruction) : res typenv :=
   | Ibuiltin ef args res s =>
       let sig := ef_sig ef in
       do x <- check_successor s;
-      do e1 <- type_regs e args sig.(sig_args);
-      type_reg e1 res (proj_sig_res sig)
+      do e1 <- S.type_uses e args sig.(sig_args);
+      S.type_def e1 res (proj_sig_res sig)
   | Icond cond args s1 s2 =>
       do x1 <- check_successor s1;
       do x2 <- check_successor s2;
-      type_regs e args (type_of_condition cond)
+      S.type_uses e args (type_of_condition cond)
   | Ijumptable arg tbl =>
       do x <- check_successors tbl;
-      do e1 <- type_reg e arg Tint;
+      do e1 <- S.type_use e arg Tint;
       if zle (list_length_z tbl * 4) Int.max_unsigned then OK e1 else Error(msg "jumptable too big")
   | Ireturn optres =>
       match optres, f.(fn_sig).(sig_res) with
       | None, None => OK e
-      | Some r, Some t => type_reg e r t
+      | Some r, Some t => S.type_use e r t
       | _, _ => Error(msg "bad return")
       end
   end.
 
-Definition type_code (e: typenv): res typenv :=
+Definition type_code (e: S.typenv): res S.typenv :=
   PTree.fold
     (fun re pc i =>
        match re with
@@ -312,179 +394,38 @@ Definition type_code (e: typenv): res typenv :=
        end)
     f.(fn_code) (OK e).
 
-(** Second pass: iteratively process the remaining equality constraints
-    arising out of "move" instructions *)
-
-Definition equations := list (reg * reg).
-
-Fixpoint solve_rec (e: typenv) (changed: bool) (q: equations) : res (typenv * bool) :=
-  match q with 
-  | nil =>
-      OK (e, changed)
-  | (r1, r2) :: q' =>
-      do (changed1, e1) <- type_move e r1 r2; solve_rec e1 (changed || changed1) q'
-  end.
-
-Lemma type_move_length:
-  forall e r1 r2 changed e',
-  type_move e r1 r2 = OK (changed, e') ->
-  if changed 
-  then List.length e'.(te_eqs) = List.length e.(te_eqs)
-  else (List.length e'.(te_eqs) <= S (List.length e.(te_eqs)))%nat.
-Proof.
-  intros. functional inversion H; subst; simpl; omega.
-Qed.
-
-Lemma solve_rec_length:
-  forall q e changed e' changed',
-  solve_rec e changed q = OK (e', changed') ->
-  (List.length e'.(te_eqs) <= List.length e.(te_eqs) + List.length q)%nat.
-Proof.
-  induction q; simpl; intros. 
-- inv H. omega.
-- destruct a as [r1 r2]. monadInv H. 
-  assert (length (te_eqs x0) <= S (length (te_eqs e)))%nat.
-  {
-    exploit type_move_length; eauto. destruct x; omega. 
-  }
-  exploit IHq; eauto. 
-  omega.
-Qed.
-
-Lemma solve_rec_shorter:
-  forall q e e',
-  solve_rec e false q = OK (e', true) ->
-  (List.length e'.(te_eqs) < List.length e.(te_eqs) + List.length q)%nat.
-Proof.
-  induction q; simpl; intros.
-- discriminate.
-- destruct a as [r1 r2]; monadInv H.
-  exploit type_move_length; eauto. destruct x; intros. 
-  exploit solve_rec_length; eauto. omega.
-  exploit IHq; eauto. omega.
-Qed.
-
-Definition length_typenv (e: typenv) := length e.(te_eqs).
-
-Function solve_equations (e: typenv) {measure length_typenv e} : res typenv :=
-  match solve_rec {| te_typ := e.(te_typ); te_eqs := nil |} false e.(te_eqs) with
-  | OK (e', false) => OK e                    (**r no progress made: terminate *)
-  | OK (e', true) => solve_equations e'       (**r at least one type changed: iterate *)
-  | Error msg => Error msg
-  end.
-Proof.
-  intros. exploit solve_rec_shorter; eauto. 
-Qed.
-
-(** The final type assignment aribtrarily gives type [Tint] to the 
-  pseudoregs that are still unconstrained at the end of type inference. *)
-
-Definition make_regenv (e: typenv) : regenv :=
-  fun r => match e.(te_typ)!r with Some ty => ty | None => Tint end.
+(** S remaining constraints *)
 
 Definition check_params_norepet (params: list reg): res unit :=
   if list_norepet_dec Reg.eq params then OK tt else Error(msg "duplicate parameters").
 
 Definition type_function : res regenv :=
-  do e1 <- type_code {| te_typ := PTree.empty typ; te_eqs := nil |};
-  do e2 <- type_regs e1 f.(fn_params) f.(fn_sig).(sig_args);
-  do e3 <- solve_equations e2;
+  do e1 <- type_code S.initial;
+  do e2 <- S.type_defs e1 f.(fn_params) f.(fn_sig).(sig_args);
+  do te <- S.solve e2;
   do x1 <- check_params_norepet f.(fn_params);
   do x2 <- check_successor f.(fn_entrypoint);
-  OK (make_regenv e3).
+  OK te.
 
 (** ** Soundness proof *)
 
-(** What it means for a final type assignment [te] to satisfy the constraints
-  collected so far in the [e] state. *)
-
-Definition satisf (te: regenv) (e: typenv) : Prop :=
-     (forall r ty, e.(te_typ)!r = Some ty -> te r = ty)
-  /\ (forall r1 r2, In (r1, r2) e.(te_eqs) -> te r1 = te r2).
-
-Remark type_reg_incr:
-  forall e r ty e' te, type_reg e r ty = OK e' -> satisf te e' -> satisf te e.
-Proof.
-  unfold type_reg; intros; destruct (e.(te_typ)!r) eqn:E.
-  destruct (typ_eq ty t); inv H. auto.
-  inv H. destruct H0 as [A B]. split; intros.
-  apply A. simpl. rewrite PTree.gso; auto. congruence. 
-  apply B. simpl; auto. 
-Qed.
-
-Hint Resolve type_reg_incr: ty.
-
-Remark type_regs_incr:
-  forall rl tyl e e' te, type_regs e rl tyl = OK e' -> satisf te e' -> satisf te e.
-Proof.
-  induction rl; simpl; intros; destruct tyl; try discriminate.
-  inv H; eauto with ty.
-  monadInv H. eauto with ty.
-Qed.
-
-Hint Resolve type_regs_incr: ty.
-
 Remark type_ros_incr:
-  forall e ros e' te, type_ros e ros = OK e' -> satisf te e' -> satisf te e.
+  forall e ros e' te, type_ros e ros = OK e' -> S.satisf te e' -> S.satisf te e.
 Proof.
   unfold type_ros; intros. destruct ros. eauto with ty. inv H; auto with ty.
 Qed.
 
 Hint Resolve type_ros_incr: ty.
 
-Remark type_move_incr:
-  forall e r1 r2 changed e' te,
-  type_move e r1 r2 = OK (changed, e') ->
-  satisf te e' -> satisf te e.
-Proof.
-  intros. destruct H0 as [A B]. functional inversion H; subst; simpl in *.
-- split; auto. 
-- split; intros. apply A. rewrite PTree.gso; auto. congruence. auto. 
-- split; intros. apply A. rewrite PTree.gso; auto. congruence. auto.
-- split; auto.
-Qed.
-
-Hint Resolve type_move_incr: ty.
-
-Lemma type_reg_sound:
-  forall e r ty e' te, type_reg e r ty = OK e' -> satisf te e' -> te r = ty.
-Proof.
-  unfold type_reg; intros. destruct H0 as [A B]. 
-  destruct (e.(te_typ)!r) as [t|] eqn:E.
-  destruct (typ_eq ty t); inv H. apply A; auto.
-  inv H. apply A. simpl. apply PTree.gss.
-Qed.
-
-Lemma type_regs_sound:
-  forall rl tyl e e' te, type_regs e rl tyl = OK e' -> satisf te e' -> map te rl = tyl.
-Proof.
-  induction rl; simpl; intros; destruct tyl; try discriminate.
-- auto.
-- monadInv H. f_equal; eauto. eapply type_reg_sound. eauto. eauto with ty. 
-Qed.
-
 Lemma type_ros_sound:
-  forall e ros e' te, type_ros e ros = OK e' -> satisf te e' ->
-  match ros with inl r => te r = Tint | inr s => True end.
+  forall e ros e' te, type_ros e ros = OK e' -> S.satisf te e' ->
+  match ros with inl r => subtype (te r) Tint = true | inr s => True end.
 Proof.
   unfold type_ros; intros. destruct ros. 
-  eapply type_reg_sound; eauto.
+  eapply S.type_use_sound; eauto.
   auto.
 Qed.
 
-Lemma type_move_sound:
-  forall e r1 r2 changed e' te,
-  type_move e r1 r2 = OK (changed, e') -> satisf te e' -> te r1 = te r2.
-Proof.
-  intros. destruct H0 as [A B]. functional inversion H; subst; simpl in *.
-- apply B; auto.
-- rewrite (A r1 ty1). rewrite (A r2 ty1). auto. 
-  apply PTree.gss. rewrite PTree.gso; auto. congruence.
-- rewrite (A r1 ty2). rewrite (A r2 ty2). auto. 
-  rewrite PTree.gso; auto. congruence. apply PTree.gss.
-- erewrite ! A; eauto. 
-Qed.
-
 Lemma check_successor_sound:
   forall s x, check_successor s = OK x -> valid_successor f s.
 Proof.
@@ -502,9 +443,20 @@ Proof.
   monadInv H. destruct H0. subst a; eauto with ty. eauto. 
 Qed.
 
+Remark subtype_list_charact:
+  forall tyl1 tyl2,
+  subtype_list tyl1 tyl2 = true <-> list_forall2 RTLtypes.sub tyl1 tyl2.
+Proof.
+  unfold RTLtypes.sub; intros; split; intros.
+  revert tyl1 tyl2 H. induction tyl1; destruct tyl2; simpl; intros; try discriminate.
+  constructor.
+  InvBooleans. constructor; auto. 
+  revert tyl1 tyl2 H. induction 1; simpl. auto. rewrite H; rewrite IHlist_forall2; auto.
+Qed.
+
 Lemma type_instr_incr:
   forall e i e' te,
-  type_instr e i = OK e' -> satisf te e' -> satisf te e.
+  type_instr e i = OK e' -> S.satisf te e' -> S.satisf te e.
 Proof.
   intros; destruct i; try (monadInv H); eauto with ty.
 - (* op *)
@@ -526,7 +478,7 @@ Qed.
 
 Lemma type_instr_sound:
   forall e i e' te,
-  type_instr e i = OK e' -> satisf te e' -> wt_instr te f i.
+  type_instr e i = OK e' -> S.satisf te e' -> wt_instr f te i.
 Proof.
   intros; destruct i; try (monadInv H); simpl.
 - (* nop *)
@@ -537,25 +489,28 @@ Proof.
   + assert (o = Omove) by (unfold is_move in ISMOVE; destruct o; congruence).
     subst o.
     destruct l; try discriminate. destruct l; monadInv EQ0.
-    constructor. eapply type_move_sound; eauto. eauto with ty.
+    constructor. eapply S.type_move_sound; eauto. eauto with ty.
   + destruct (type_of_operation o) as [targs tres] eqn:TYOP. monadInv EQ0.
     apply wt_Iop. 
     unfold is_move in ISMOVE; destruct o; congruence.
-    rewrite TYOP. f_equal. eapply type_regs_sound; eauto with ty. eapply type_reg_sound; eauto.
+    rewrite TYOP. rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+    rewrite TYOP. eapply S.type_def_sound; eauto with ty.
     eauto with ty.
 - (* load *)
   constructor.
-  eapply type_regs_sound; eauto with ty. eapply type_reg_sound; eauto.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+  eapply S.type_def_sound; eauto with ty.
   eauto with ty.
 - (* store *)
   constructor.
-  eapply type_regs_sound; eauto with ty. eapply type_reg_sound; eauto.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+  eapply S.type_use_sound; eauto with ty.
   eauto with ty.
 - (* call *)
   constructor. 
-  eapply type_ros_sound; eauto with ty. 
-  eapply type_regs_sound; eauto with ty.
-  eapply type_reg_sound; eauto.
+  eapply type_ros_sound; eauto with ty.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+  eapply S.type_def_sound; eauto with ty.
   eauto with ty.
 - (* tailcall *)
   destruct (opt_typ_eq (sig_res s) (sig_res (fn_sig f))); try discriminate.
@@ -563,206 +518,86 @@ Proof.
   constructor.
   eapply type_ros_sound; eauto with ty. 
   auto.
-  eapply type_regs_sound; eauto with ty.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
   apply tailcall_is_possible_correct; auto.
 - (* builtin *)
   constructor.
-  eapply type_regs_sound; eauto with ty.
-  eapply type_reg_sound; eauto.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
+  eapply S.type_def_sound; eauto with ty.
   eauto with ty.
 - (* cond *)
   constructor.
-  eapply type_regs_sound; eauto with ty.
+  rewrite subtype_list_charact. eapply S.type_uses_sound; eauto with ty.
   eauto with ty.
   eauto with ty.
 - (* jumptable *)
   destruct (zle (list_length_z l * 4) Int.max_unsigned); inv EQ2.
   constructor.
-  eapply type_reg_sound; eauto.
+  eapply S.type_use_sound; eauto.
   eapply check_successors_sound; eauto. 
   auto.
 - (* return *)
   simpl in H. destruct o as [r|] eqn: RET; destruct (sig_res (fn_sig f)) as [t|] eqn: RES; try discriminate.
-  constructor. simpl. rewrite RES. f_equal. eapply type_reg_sound; eauto.
-  inv H. constructor. rewrite RES; auto. 
+  econstructor. eauto. eapply S.type_use_sound; eauto with ty.
+  inv H. constructor. auto. 
 Qed.
 
 Lemma type_code_sound:
-  forall e e' te,
+  forall pc i e e' te,
   type_code e = OK e' ->
-  forall pc i, f.(fn_code)!pc = Some i -> satisf te e' -> wt_instr te f i.
+  f.(fn_code)!pc = Some i -> S.satisf te e' -> wt_instr f te i.
 Proof.
-  intros e0 e1 te TCODE.
+  intros pc i e0 e1 te TCODE.
   set (P := fun c opte =>
          match opte with
          | Error _ => True
-         | OK e' => forall pc i, c!pc = Some i -> satisf te e' -> wt_instr te f i
+         | OK e' => c!pc = Some i -> S.satisf te e' -> wt_instr f te i
          end).
-  assert (P f.(fn_code) (OK e1)).
-  {
-    rewrite <- TCODE. unfold type_code. apply PTree_Properties.fold_rec; unfold P; intros. 
-    - (* extensionality *)
-      destruct a; auto. intros. rewrite <- H in H1. eapply H0; eauto. 
-    - (* base case *)
-      rewrite PTree.gempty in H; discriminate.
-    - (* inductive case *)
-      destruct a as [e|?]; auto. 
-      destruct (type_instr e v) as [e'|?] eqn:TYINSTR; auto.
-      intros. rewrite PTree.gsspec in H2. destruct (peq pc k). 
-      inv H2. eapply type_instr_sound; eauto. 
-      eapply H1; eauto. eapply type_instr_incr; eauto. 
-  }
-  intros; eapply H; eauto. 
-Qed.
-
-Lemma solve_rec_incr:
-  forall te q e changed e' changed',
-  solve_rec e changed q = OK (e', changed') -> satisf te e' -> satisf te e.
-Proof.
-  induction q; simpl; intros.
-- inv H; auto.
-- destruct a as [r1 r2]; monadInv H. eauto with ty. 
-Qed.
-
-Lemma solve_rec_sound:
-  forall r1 r2 te q e changed e' changed',
-  solve_rec e changed q = OK (e', changed') -> satisf te e' -> In (r1, r2) q -> te r1 = te r2.
-Proof.
-  induction q; simpl; intros.
-- contradiction. 
-- destruct a as [r3 r4]; monadInv H. destruct H1 as [A|A].
-  inv A. eapply type_move_sound; eauto. eapply solve_rec_incr; eauto. 
-  eapply IHq; eauto.
-Qed.
-
-Lemma solve_rec_false:
-  forall q e changed e',
-  solve_rec e changed q = OK (e', false) -> changed = false.
-Proof.
-  induction q; simpl; intros. 
-- inv H; auto.
-- destruct a as [r1 r2]; monadInv H. exploit IHq; eauto. rewrite orb_false_iff. tauto. 
-Qed.
-
-Lemma solve_rec_solved:
-  forall r1 r2 q e changed e',
-  solve_rec e changed q = OK (e', false) ->
-  In (r1, r2) q -> make_regenv e r1 = make_regenv e r2.
-Proof.
-  induction q; simpl; intros.
-- contradiction.
-- destruct a as [r3 r4]; monadInv H.
-  assert (x = false). 
-  { exploit solve_rec_false; eauto. rewrite orb_false_iff. tauto. }
-  subst x. functional inversion EQ.
-  + destruct H0 as [A|A]. 
-    inv A. unfold make_regenv. rewrite H1; rewrite H2; auto.
-    subst x0. exploit IHq; eauto.
-  + destruct H0 as [A|A]. 
-    inv A. unfold make_regenv. rewrite H1; rewrite H2; auto.
-    subst x0. exploit IHq; eauto.
-Qed.
-
-Lemma solve_equations_incr:
-  forall te e e', solve_equations e = OK e' -> satisf te e' -> satisf te e.
-Proof.
-  intros te e0; functional induction (solve_equations e0); intros.
-- inv H. auto.
-- exploit solve_rec_incr; eauto. intros [A B]. split; intros.
-  apply A; auto.
-  eapply solve_rec_sound; eauto. 
-- discriminate.
-Qed.
-
-Lemma solve_equations_sound:
-  forall e e', solve_equations e = OK e' -> satisf (make_regenv e') e'.
-Proof.
-  intros e0; functional induction (solve_equations e0); intros.
-- inv H. split; intros. 
-  unfold make_regenv; rewrite H; auto. 
-  exploit solve_rec_solved; eauto.
-- eauto.
-- discriminate.
+  change (P f.(fn_code) (OK e1)).
+  rewrite <- TCODE. unfold type_code. apply PTree_Properties.fold_rec; unfold P; intros. 
+  - (* extensionality *)
+    destruct a; auto; intros. rewrite <- H in H1. eapply H0; eauto. 
+  - (* base case *)
+    rewrite PTree.gempty in H; discriminate.
+  - (* inductive case *)
+    destruct a as [e|?]; auto. 
+    destruct (type_instr e v) as [e'|?] eqn:TYINSTR; auto.
+    intros. rewrite PTree.gsspec in H2. destruct (peq pc k). 
+    inv H2. eapply type_instr_sound; eauto. 
+    eapply H1; eauto. eapply type_instr_incr; eauto.
 Qed.
 
 Theorem type_function_correct:
   forall env, type_function = OK env -> wt_function f env.
 Proof.
-  unfold type_function; intros. monadInv H. 
-  exploit solve_equations_sound; eauto. intros SAT1.
-  exploit solve_equations_incr; eauto. intros SAT0.
-  exploit type_regs_incr; eauto. intros SAT.
+  unfold type_function; intros. monadInv H.
+  assert (SAT0: S.satisf env x0) by (eapply S.solve_sound; eauto).
+  assert (SAT1: S.satisf env x) by (eauto with ty).
   constructor.
 - (* type of parameters *)
-  eapply type_regs_sound; eauto.
+  rewrite subtype_list_charact. eapply S.type_defs_sound; eauto.
 - (* parameters are unique *)
   unfold check_params_norepet in EQ2. 
   destruct (list_norepet_dec Reg.eq (fn_params f)); inv EQ2; auto. 
 - (* instructions are well typed *)
-  intros; eapply type_code_sound; eauto.
+  intros. eapply type_code_sound; eauto. 
 - (* entry point is valid *)
   eauto with ty. 
 Qed.
 
 (** ** Completeness proof *)
 
-Lemma type_reg_complete:
-  forall te e r ty,
-  satisf te e -> te r = ty -> exists e', type_reg e r ty = OK e' /\ satisf te e'.
-Proof.
-  intros. unfold type_reg. 
-  destruct ((te_typ e)!r) as [ty'|] eqn: E.
-  exists e; split; auto. replace ty' with ty. apply dec_eq_true.
-  exploit (proj1 H); eauto. congruence. 
-  econstructor; split. reflexivity. 
-  destruct H as [A B]; split; simpl; intros; auto. 
-  rewrite PTree.gsspec in H. destruct (peq r0 r); auto. congruence. 
-Qed.
-
-Lemma type_regs_complete:
-  forall te rl tyl e,
-  satisf te e -> map te rl = tyl -> exists e', type_regs e rl tyl = OK e' /\ satisf te e'.
-Proof.
-  induction rl; simpl; intros; subst tyl.
-  exists e; auto.
-  destruct (type_reg_complete te e a (te a)) as [e1 [A B]]; auto.
-  exploit IHrl; eauto. intros [e' [C D]]. 
-  exists e'; split; auto. rewrite A; simpl; rewrite C; auto.
-Qed.
-
 Lemma type_ros_complete:
   forall te ros e,
-  satisf te e ->
-  match ros with inl r => te r = Tint | inr s => True end ->
-  exists e', type_ros e ros = OK e' /\ satisf te e'.
+  S.satisf te e ->
+  match ros with inl r => subtype (te r) Tint = true | inr s => True end ->
+  exists e', type_ros e ros = OK e' /\ S.satisf te e'.
 Proof.
   intros; destruct ros; simpl. 
-  eapply type_reg_complete; eauto.
+  eapply S.type_use_complete; eauto.
   exists e; auto.
 Qed.
 
-Lemma type_move_complete:
-  forall te r1 r2 e,
-  satisf te e ->
-  te r1 = te r2 ->
-  exists changed e', type_move e r1 r2 = OK (changed, e') /\ satisf te e'.
-Proof.
-  intros. functional induction (type_move e r1 r2). 
-- econstructor; econstructor; split; eauto. 
-  destruct H as [A B]; split; simpl; intros; auto. 
-  destruct H; auto. congruence.
-- econstructor; econstructor; split; eauto. 
-  destruct H as [A B]; split; simpl; intros; auto. 
-  rewrite PTree.gsspec in H. destruct (peq r r2); auto. 
-  subst. exploit A; eauto. congruence. 
-- econstructor; econstructor; split; eauto. 
-  destruct H as [A B]; split; simpl; intros; auto. 
-  rewrite PTree.gsspec in H. destruct (peq r r1); auto. 
-  subst. exploit A; eauto. congruence.
-- econstructor; econstructor; split; eauto.
-- destruct H as [A B]. apply A in e0. apply A in e1. congruence. 
-Qed.
-
 Lemma check_successor_complete:
   forall s, valid_successor f s -> check_successor s = OK tt.
 Proof.
@@ -772,46 +607,51 @@ Qed.
 
 Lemma type_instr_complete:
   forall te e i,
-  satisf te e ->
-  wt_instr te f i ->
-  exists e', type_instr e i = OK e' /\ satisf te e'.
+  S.satisf te e ->
+  wt_instr f te i ->
+  exists e', type_instr e i = OK e' /\ S.satisf te e'.
 Proof.
   induction 2; simpl.
 - (* nop *)
   econstructor; split. rewrite check_successor_complete; simpl; eauto. auto.
 - (* move *)
-  exploit type_move_complete; eauto. intros (changed & e' & A & B).
+  exploit S.type_move_complete; eauto. intros (changed & e' & A & B).
   exists e'; split. rewrite check_successor_complete by auto; simpl. rewrite A; auto. auto.
 - (* other op *)
-  destruct (type_of_operation op) as [targ tres]. injection H1; clear H1; intros. 
-  exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]].
+  destruct (type_of_operation op) as [targ tres]. simpl in *.
+  rewrite subtype_list_charact in H1.
+  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   replace (is_move op) with false. rewrite A; simpl; rewrite C; auto.
   destruct op; reflexivity || congruence.
 - (* load *)
-  exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]].
+  rewrite subtype_list_charact in H0.
+  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* store *)
-  exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]].
+  rewrite subtype_list_charact in H0.
+  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.type_use_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* call *)
   exploit type_ros_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_regs_complete. eauto. eauto. intros [e2 [C D]].
-  exploit type_reg_complete. eexact D. eauto. intros [e3 [E F]].
+  rewrite subtype_list_charact in H1.
+  exploit S.type_uses_complete. eauto. eauto. intros [e2 [C D]].
+  exploit S.type_def_complete. eexact D. eauto. intros [e3 [E F]].
   exists e3; split; auto. 
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; simpl; rewrite E; auto.
 - (* tailcall *)
   exploit type_ros_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_regs_complete. eauto. eauto. intros [e2 [C D]].
+  rewrite subtype_list_charact in H2.
+  exploit S.type_uses_complete. eauto. eauto. intros [e2 [C D]].
   exists e2; split; auto. 
   rewrite A; simpl; rewrite C; simpl. 
   rewrite H1; rewrite dec_eq_true. 
@@ -821,41 +661,43 @@ Proof.
   exploit (H3 a); auto. intros. destruct a; try contradiction. apply IHl.
   intros; apply H3; auto. 
 - (* builtin *)
-  exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
-  exploit type_reg_complete. eexact B. eauto. intros [e2 [C D]].
+  rewrite subtype_list_charact in H0.
+  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.type_def_complete. eexact B. eauto. intros [e2 [C D]].
   exists e2; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite A; simpl; rewrite C; auto.
 - (* cond *)
-  exploit type_regs_complete. eauto. eauto. intros [e1 [A B]].
+  rewrite subtype_list_charact in H0.
+  exploit S.type_uses_complete. eauto. eauto. intros [e1 [A B]].
   exists e1; split; auto.
   rewrite check_successor_complete by auto; simpl. 
   rewrite check_successor_complete by auto; simpl.
   auto.
 - (* jumptbl *)
-  exploit type_reg_complete. eauto. eauto. intros [e1 [A B]].
+  exploit S.type_use_complete. eauto. eauto. intros [e1 [A B]].
   exists e1; split; auto.
   replace (check_successors tbl) with (OK tt). simpl. 
   rewrite A; simpl. apply zle_true; auto. 
   revert H1. generalize tbl. induction tbl0; simpl; intros. auto. 
   rewrite check_successor_complete by auto; simpl.
   apply IHtbl0; intros; auto.
-- (* return *)
-  rewrite <- H0. destruct optres; simpl.
-  apply type_reg_complete; auto.
-  exists e; auto.
+- (* return none *)
+  rewrite H0. exists e; auto.
+- (* return some *)
+  rewrite H0. apply S.type_use_complete; auto.
 Qed.
 
 Lemma type_code_complete:
   forall te e,
-  (forall pc instr, f.(fn_code)!pc = Some instr -> wt_instr te f instr) ->
-  satisf te e ->
-  exists e', type_code e = OK e' /\ satisf te e'.
+  (forall pc instr, f.(fn_code)!pc = Some instr -> wt_instr f te instr) ->
+  S.satisf te e ->
+  exists e', type_code e = OK e' /\ S.satisf te e'.
 Proof.
   intros te e0 WTC SAT0.
   set (P := fun c res =>
-        (forall pc i, c!pc = Some i -> wt_instr te f i) ->
-        exists e', res = OK e' /\ satisf te e').
+        (forall pc i, c!pc = Some i -> wt_instr f te i) ->
+        exists e', res = OK e' /\ S.satisf te e').
   assert (P f.(fn_code) (type_code e0)).
   {
     unfold type_code. apply PTree_Properties.fold_rec; unfold P; intros.
@@ -871,49 +713,16 @@ Proof.
   apply H; auto.
 Qed.
 
-Lemma solve_rec_complete:
-  forall te q e changed,
-  satisf te e ->
-  (forall r1 r2, In (r1, r2) q -> te r1 = te r2) ->
-  exists e' changed', solve_rec e changed q = OK(e', changed') /\ satisf te e'.
-Proof.
-  induction q; simpl; intros. 
-- exists e; exists changed; auto.
-- destruct a as [r3 r4]. 
-  exploit type_move_complete. eauto. apply H0. left; eauto. 
-  intros (changed1 & e1 & A & B).
-  destruct (IHq e1 (changed || changed1)) as (e2 & changed2 & C & D); auto.
-  exists e2; exists changed2; split; auto. rewrite A; simpl; rewrite C; auto.
-Qed.
-
-Lemma solve_equations_complete:
-  forall te e,
-  satisf te e ->
-  exists e', solve_equations e = OK e' /\ satisf te e'.
-Proof.
-  intros te e0. functional induction (solve_equations e0); intros.
-- exists e; auto. 
-- destruct (solve_rec_complete te (te_eqs e) {| te_typ := te_typ e; te_eqs := nil |} false)
-  as (e1 & changed1 & A & B). 
-  destruct H; split; intros. apply H; auto. contradiction.
-  exact (proj2 H). 
-  rewrite e0 in A. inv A. auto. 
-- destruct (solve_rec_complete te (te_eqs e) {| te_typ := te_typ e; te_eqs := nil |} false)
-  as (e1 & changed1 & A & B). 
-  destruct H; split; intros. apply H; auto. contradiction.
-  exact (proj2 H).
-  congruence.
-Qed.
-
 Theorem type_function_complete:
   forall te, wt_function f te -> exists te, type_function = OK te.
 Proof.
   intros. destruct H. 
-  destruct (type_code_complete te {| te_typ := PTree.empty typ; te_eqs := nil |}) as (e1 & A & B).
-  auto. split; simpl; intros. rewrite PTree.gempty in H; congruence. contradiction.
-  destruct (type_regs_complete te f.(fn_params) f.(fn_sig).(sig_args) e1) as (e2 & C & D); auto.
-  destruct (solve_equations_complete te e2) as (e3 & E & F); auto.
-  exists (make_regenv e3); unfold type_function. 
+  destruct (type_code_complete te S.initial) as (e1 & A & B).
+  auto. apply S.satisf_initial. 
+  destruct (S.type_defs_complete te f.(fn_params) f.(fn_sig).(sig_args) e1) as (e2 & C & D); auto.
+  rewrite <- subtype_list_charact; auto.
+  destruct (S.solve_complete te e2) as (te' & E); auto.
+  exists te'; unfold type_function.
   rewrite A; simpl. rewrite C; simpl. rewrite E; simpl. 
   unfold check_params_norepet. rewrite pred_dec_true; auto. simpl. 
   rewrite check_successor_complete by auto. auto. 
@@ -973,32 +782,52 @@ Qed.
 
 Lemma wt_exec_Iop:
   forall (ge: genv) env f sp op args res s rs m v,
-  wt_instr env f (Iop op args res s) ->
+  wt_instr f env (Iop op args res s) ->
   eval_operation ge sp op rs##args m = Some v ->
   wt_regset env rs ->
   wt_regset env (rs#res <- v).
 Proof.
   intros. inv H. 
-  simpl in H0. inv H0. apply wt_regset_assign; auto. rewrite <- H4; apply H1.
-  apply wt_regset_assign; auto. 
-  replace (env res) with (snd (type_of_operation op)).
+  simpl in H0. inv H0. apply wt_regset_assign; auto. 
+  eapply Val.has_subtype; eauto. 
+  apply wt_regset_assign; auto.
+  eapply Val.has_subtype; eauto.
   eapply type_of_operation_sound; eauto.
-  rewrite <- H7; auto.
 Qed.
 
 Lemma wt_exec_Iload:
   forall env f chunk addr args dst s m a v rs,
-  wt_instr env f (Iload chunk addr args dst s) ->
+  wt_instr f env (Iload chunk addr args dst s) ->
   Mem.loadv chunk m a = Some v ->
   wt_regset env rs ->
   wt_regset env (rs#dst <- v).
 Proof.
-  intros. destruct a; simpl in H0; try discriminate.
+  intros. destruct a; simpl in H0; try discriminate. inv H.
   apply wt_regset_assign; auto. 
-  inv H. rewrite H8. 
+  eapply Val.has_subtype; eauto. 
   eapply Mem.load_type; eauto.
 Qed.
 
+Lemma wt_exec_Ibuiltin:
+  forall env f ef (ge: genv) args res s vargs m t vres m' rs,
+  wt_instr f env (Ibuiltin ef args res s) ->
+  external_call ef ge vargs m t vres m' ->
+  wt_regset env rs ->
+  wt_regset env (rs#res <- vres).
+Proof.
+  intros. inv H. 
+  apply wt_regset_assign; auto. 
+  eapply Val.has_subtype; eauto.
+  eapply external_call_well_typed; eauto.
+Qed.
+
+Lemma wt_instr_at:
+  forall f env pc i,
+  wt_function f env -> f.(fn_code)!pc = Some i -> wt_instr f env i.
+Proof.
+  intros. inv H. eauto. 
+Qed.
+
 Inductive wt_stackframes: list stackframe -> option typ -> Prop :=
   | wt_stackframes_nil:
       wt_stackframes nil (Some Tint)
@@ -1006,7 +835,7 @@ Inductive wt_stackframes: list stackframe -> option typ -> Prop :=
       forall s res f sp pc rs env tyres,
       wt_function f env ->
       wt_regset env rs ->
-      env res = match tyres with None => Tint | Some t => t end ->
+      subtype (match tyres with None => Tint | Some t => t end) (env res) = true ->
       wt_stackframes s (sig_res (fn_sig f)) ->
       wt_stackframes (Stackframe res f sp pc rs :: s) tyres.
 
@@ -1042,24 +871,13 @@ Lemma subject_reduction:
   forall (WT: wt_state st1), wt_state st2.
 Proof.
   induction 1; intros; inv WT;
-  try (generalize (wt_instrs _ _ WT_FN pc _ H);
-       intro WT_INSTR;
-       inv WT_INSTR).
+  try (generalize (wt_instrs _ _ WT_FN pc _ H); intros WTI).
   (* Inop *)
   econstructor; eauto.
   (* Iop *)
-  econstructor; eauto.
-  apply wt_regset_assign. auto. 
-  simpl in H0. inv H0. rewrite <- H3. apply WT_RS.
-  econstructor; eauto.
-  apply wt_regset_assign. auto.
-  replace (env res) with (snd (type_of_operation op)).
-  eapply type_of_operation_sound; eauto.
-  rewrite <- H6. reflexivity.
+  econstructor; eauto. eapply wt_exec_Iop; eauto.
   (* Iload *)
-  econstructor; eauto.
-  apply wt_regset_assign. auto. rewrite H8. 
-  eapply type_of_chunk_correct; eauto.
+  econstructor; eauto. eapply wt_exec_Iload; eauto.
   (* Istore *)
   econstructor; eauto.
   (* Icall *)
@@ -1072,8 +890,8 @@ Proof.
     exact wt_p. exact H0.
     discriminate.
   econstructor; eauto.
-  econstructor; eauto.
-  rewrite <- H7. apply wt_regset_list. auto.
+  econstructor; eauto. inv WTI; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list. auto.
   (* Itailcall *)
   assert (wt_fundef fd).
     destruct ros; simpl in H0.
@@ -1084,32 +902,28 @@ Proof.
     exact wt_p. exact H0.
     discriminate.
   econstructor; eauto.
-  rewrite H6; auto.
-  rewrite <- H7. apply wt_regset_list. auto.
+  inv WTI. rewrite H7; auto. 
+  inv WTI. eapply Val.has_subtype_list; eauto. apply wt_regset_list. auto.
   (* Ibuiltin *)
-  econstructor; eauto.
-  apply wt_regset_assign. auto. 
-  rewrite H6. eapply external_call_well_typed; eauto. 
+  econstructor; eauto. eapply wt_exec_Ibuiltin; eauto.
   (* Icond *)
   econstructor; eauto.
   (* Ijumptable *)
   econstructor; eauto.
   (* Ireturn *)
   econstructor; eauto. 
-  destruct or; simpl in *.
-  rewrite <- H2. apply WT_RS. exact I.
+  inv WTI; simpl. auto. rewrite H2. eapply Val.has_subtype; eauto. 
   (* internal function *)
-  simpl in *. inv H5. inversion H1; subst.  
+  simpl in *. inv H5.
   econstructor; eauto.
-  apply wt_init_regs; auto. rewrite wt_params0; auto.
+  inv H1. apply wt_init_regs; auto. eapply Val.has_subtype_list; eauto. 
   (* external function *)
-  simpl in *. inv H5. 
-  econstructor; eauto. 
+  econstructor; eauto. simpl.  
   change (Val.has_type res (proj_sig_res (ef_sig ef))).
   eapply external_call_well_typed; eauto.
   (* return *)
-  inv H1. econstructor; eauto. 
-  apply wt_regset_assign; auto. congruence. 
+  inv H1. econstructor; eauto.
+  apply wt_regset_assign; auto. eapply Val.has_subtype; eauto. 
 Qed.
 
 End SUBJECT_REDUCTION.
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index 179b2cc..f8cd561 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -481,7 +481,17 @@ let add_interfs_instr g instr live =
       add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts;
       (* Take into account destroyed reg when accessing Incoming param *)
       if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs
-      then add_interfs_list g (vmreg temp_for_parent_frame) dsts
+      then add_interfs_list g (vmreg temp_for_parent_frame) dsts;
+      (* Take into account destroyed reg when initializing Outgoing
+         arguments of type Tsingle *)
+      if List.exists
+           (function (L(S(Outgoing, _, Tsingle))) -> true | _ -> false) dsts
+      then
+        List.iter
+          (fun mr ->
+            add_interfs_list g (vmreg mr) srcs;
+            IRC.add_interf g (vmreg mr) ftmp)
+          (destroyed_by_setstack Tsingle)
   | Xop(op, args, res) ->
       begin match is_two_address op args with
       | None ->
@@ -825,7 +835,7 @@ let make_parmove srcs dsts itmp ftmp k =
   assert (Array.length dst = n);
   let status = Array.make n To_move in
   let temp_for =
-    function Tint -> itmp | Tfloat -> ftmp | Tlong -> assert false in
+    function Tint -> itmp | (Tfloat|Tsingle) -> ftmp | Tlong -> assert false in
   let code = ref [] in
   let add_move s d =
     match s, d with
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 1808402..dfa81d8 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -413,7 +413,8 @@ Lemma gss_index_contains:
 Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v' [A B]].
   assert (v' = v).
-    destruct v; destruct (type_of_index idx); simpl in *; intuition congruence.
+    destruct v; destruct (type_of_index idx); simpl in *;
+    try contradiction; auto. rewrite Floats.Float.singleoffloat_of_single in B; auto. 
   subst v'. auto.
 Qed.
 
@@ -513,6 +514,20 @@ Proof.
   intros. exploit gss_index_contains_base; eauto. intros [v'' [A B]].
   exists v''; split; auto.
   inv H2; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto.
+  rewrite Floats.Float.singleoffloat_of_single; auto. 
+  econstructor; eauto.
+Qed.
+
+Lemma gss_index_contains_inj':
+  forall j idx m m' sp v v',
+  Mem.store (chunk_of_type (type_of_index idx)) m sp (offset_of_index fe idx) v' = Some m' ->
+  index_valid idx ->
+  val_inject j v v' ->
+  index_contains_inj j m' sp idx (Val.load_result (chunk_of_type (type_of_index idx)) v).
+Proof.
+  intros. exploit gss_index_contains_base; eauto. intros [v'' [A B]].
+  exists v''; split; auto.
+  inv H1; destruct (type_of_index idx); simpl in *; try contradiction; subst; auto. 
   econstructor; eauto.
 Qed.
 
@@ -783,7 +798,7 @@ Lemma agree_frame_set_reg:
 Proof.
   intros. inv H; constructor; auto; intros.
   rewrite Locmap.gso. auto. red. intuition congruence.
-  apply wt_setloc; auto.
+  apply wt_setreg; auto.
 Qed.
 
 Lemma agree_frame_set_regs:
@@ -838,17 +853,16 @@ Lemma agree_frame_set_local:
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Local ofs ty -> slot_valid f Local ofs ty = true ->
   val_inject j v v' ->
-  Val.has_type v ty ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_local ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Local ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_local ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   unfold Locmap.set.
   destruct (Loc.eq (S Local ofs ty) (S Local ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Local ofs ty) (S Local ofs0 ty0)).
   eapply gso_index_contains_inj. eauto. eauto with stacking. eauto. 
   simpl. simpl in d. intuition.
@@ -870,7 +884,7 @@ Proof.
 (* perm *)
   red; intros. eapply Mem.perm_store_1; eauto.
 (* wt *)
-  apply wt_setloc; auto. 
+  apply wt_setstack; auto. 
 Qed.
 
 (** Preservation by assignment to outgoing slot *)
@@ -880,19 +894,18 @@ Lemma agree_frame_set_outgoing:
   agree_frame j ls ls0 m sp m' sp' parent retaddr ->
   slot_within_bounds b Outgoing ofs ty -> slot_valid f Outgoing ofs ty = true ->
   val_inject j v v' ->
-  Val.has_type v ty ->
   Mem.store (chunk_of_type ty) m' sp' (offset_of_index fe (FI_arg ofs ty)) v' = Some m'' ->
   agree_frame j (Locmap.set (S Outgoing ofs ty) v ls) ls0 m sp m'' sp' parent retaddr.
 Proof.
   intros. inv H. 
-  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H4.
+  change (chunk_of_type ty) with (chunk_of_type (type_of_index (FI_arg ofs ty))) in H3.
   constructor; auto; intros.
 (* local *)
   rewrite Locmap.gso. eapply gso_index_contains_inj; eauto with stacking. red; auto.
   red; left; congruence.
 (* outgoing *)
   unfold Locmap.set. destruct (Loc.eq (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
-  inv e. eapply gss_index_contains_inj; eauto with stacking.
+  inv e. eapply gss_index_contains_inj'; eauto with stacking.
   destruct (Loc.diff_dec (S Outgoing ofs ty) (S Outgoing ofs0 ty0)).
   eapply gso_index_contains_inj; eauto with stacking.
   red. red in d. intuition. 
@@ -911,7 +924,7 @@ Proof.
 (* perm *)
   red; intros. eapply Mem.perm_store_1; eauto.
 (* wt *)
-  apply wt_setloc; auto. 
+  apply wt_setstack; auto. 
 Qed.
 
 (** General invariance property with respect to memory changes. *)
@@ -1131,6 +1144,12 @@ Proof.
   apply check_mreg_list_incl; compute; auto.
 Qed.
 
+Remark destroyed_by_setstack_caller_save:
+  forall ty, incl (destroyed_by_setstack ty) destroyed_at_call.
+Proof.
+  destruct ty; apply check_mreg_list_incl; compute; auto.
+Qed.
+
 Remark destroyed_at_function_entry_caller_save:
   incl destroyed_at_function_entry destroyed_at_call.
 Proof.
@@ -1226,7 +1245,7 @@ Hypothesis csregs_typ:
   forall r, In r csregs -> mreg_type r = ty.
 
 Hypothesis ls_temp_undef:
-  forall r, In r (destroyed_by_op Omove) -> ls (R r) = Vundef.
+  forall r, In r (destroyed_by_setstack ty) -> ls (R r) = Vundef.
 
 Hypothesis wt_ls: wt_locset ls.
 
@@ -1269,10 +1288,10 @@ Proof.
   (* a store takes place *)
   exploit store_index_succeeds. apply (mkindex_valid a); auto with coqlib. 
   eauto. instantiate (1 := rs a). intros [m1 ST].
-  exploit (IHl k (undef_regs (destroyed_by_op Omove) rs) m1).  auto with coqlib. auto. 
+  exploit (IHl k (undef_regs (destroyed_by_setstack ty) rs) m1).  auto with coqlib. auto. 
   red; eauto with mem.
   apply agree_regs_exten with ls rs. auto.
-  intros. destruct (In_dec mreg_eq r (destroyed_by_op Omove)).
+  intros. destruct (In_dec mreg_eq r (destroyed_by_setstack ty)).
   left. apply ls_temp_undef; auto. 
   right; split. auto. apply undef_regs_other; auto.
   intros [rs' [m' [A [B [C [D [E F]]]]]]].
@@ -1370,7 +1389,8 @@ Proof.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply int_callee_save_type. auto.
-  auto.
+Local Transparent destroyed_by_setstack.
+  simpl. tauto.
   auto.
   apply incl_refl. 
   apply int_callee_save_norepet.
@@ -1388,8 +1408,8 @@ Proof.
   intros; congruence.
   intros; simpl. destruct idx; auto. congruence.
   intros. apply float_callee_save_type. auto.
+  simpl. tauto. 
   auto. 
-  auto.
   apply incl_refl. 
   apply float_callee_save_norepet.
   eexact E.
@@ -2491,7 +2511,7 @@ Proof.
   eapply index_contains_load_stack; eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
-  apply agree_frame_set_reg; auto. simpl. rewrite <- H. eapply agree_wt_ls; eauto.
+  apply agree_frame_set_reg; auto. simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
   (* Lgetstack, incoming *)
   unfold slot_valid in H0. InvBooleans.
   exploit incoming_slot_in_parameters; eauto. intros IN_ARGS.
@@ -2517,7 +2537,7 @@ Proof.
   eapply agree_frame_set_reg; eauto. eapply agree_frame_set_reg; eauto. 
   apply caller_save_reg_within_bounds. 
   apply temp_for_parent_frame_caller_save.
-  subst ty. simpl. eapply agree_wt_ls; eauto.
+  simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
   (* Lgetstack, outgoing *)
   exploit agree_outgoing; eauto. intros [v [A B]].
   econstructor; split.
@@ -2525,7 +2545,8 @@ Proof.
   eapply index_contains_load_stack; eauto.
   econstructor; eauto with coqlib.
   apply agree_regs_set_reg; auto.
-  apply agree_frame_set_reg; auto. simpl; rewrite <- H; eapply agree_wt_ls; eauto.
+  apply agree_frame_set_reg; auto.
+  simpl. eapply Val.has_subtype; eauto. eapply agree_wt_ls; eauto.
 
   (* Lsetstack *)
   set (idx := match sl with
@@ -2548,23 +2569,21 @@ Proof.
   econstructor. 
   eapply Mem.store_outside_inject; eauto. 
     intros. exploit agree_inj_unique; eauto. intros [EQ1 EQ2]; subst b' delta.
-    rewrite size_type_chunk in H5.
+    rewrite size_type_chunk in H4.
     exploit offset_of_index_disj_stack_data_2; eauto.
     exploit agree_bounds. eauto. apply Mem.perm_cur_max. eauto. 
     omega.
   apply match_stacks_change_mach_mem with m'; auto.
-  eauto with mem. eauto with mem. intros. rewrite <- H4; eapply Mem.load_store_other; eauto. left; unfold block; omega.
+  eauto with mem. eauto with mem. intros. rewrite <- H3; eapply Mem.load_store_other; eauto. left; unfold block; omega.
   eauto. eauto. auto. 
   apply agree_regs_set_slot. apply agree_regs_undef_regs; auto. 
   destruct sl.
     eapply agree_frame_set_local. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. 
-    rewrite H; eapply agree_wt_ls; eauto.
+    apply destroyed_by_setstack_caller_save. auto. auto. apply AGREGS. 
     assumption. 
     simpl in H0; discriminate.
     eapply agree_frame_set_outgoing. eapply agree_frame_undef_locs; eauto.
-    apply destroyed_by_op_caller_save. auto. auto. apply AGREGS. 
-    rewrite H; eapply agree_wt_ls; eauto.
+    apply destroyed_by_setstack_caller_save. auto. auto. apply AGREGS.
     assumption. 
   eauto with coqlib.
 
@@ -2572,12 +2591,14 @@ Proof.
   assert (Val.has_type v (mreg_type res)).
     destruct (is_move_operation op args) as [arg|] eqn:?.
     exploit is_move_operation_correct; eauto. intros [EQ1 EQ2]; subst op args. 
-    InvBooleans. simpl in H. inv H. rewrite <- H0. eapply agree_wt_ls; eauto. 
-    replace (mreg_type res) with (snd (type_of_operation op)).
+    eapply Val.has_subtype; eauto. simpl in H; inv H. eapply agree_wt_ls; eauto. 
+    destruct (type_of_operation op) as [targs tres] eqn:TYOP. 
+    eapply Val.has_subtype; eauto. 
+    replace tres with (snd (type_of_operation op)).
     eapply type_of_operation_sound; eauto.
     red; intros. subst op. simpl in Heqo.
     destruct args; simpl in H. discriminate. destruct args. discriminate. simpl in H. discriminate.
-    destruct (type_of_operation op) as [targs tres]. InvBooleans. auto. 
+    rewrite TYOP; auto. 
   assert (exists v',
           eval_operation ge (Vptr sp' Int.zero) (transl_op (make_env (function_bounds f)) op) rs0##args m' = Some v'
        /\ val_inject j v v').
@@ -2602,7 +2623,7 @@ Proof.
   eapply eval_addressing_inject; eauto. 
   eapply match_stacks_preserves_globals; eauto.
   eapply agree_inj; eauto. eapply agree_reglist; eauto.
-  destruct H2 as [a' [A B]].
+  destruct H1 as [a' [A B]].
   exploit Mem.loadv_inject; eauto. intros [v' [C D]].
   econstructor; split. 
   apply plus_one. econstructor. 
@@ -2612,7 +2633,7 @@ Proof.
   apply agree_regs_set_reg. rewrite transl_destroyed_by_load. apply agree_regs_undef_regs; auto. auto. 
   apply agree_frame_set_reg. apply agree_frame_undef_locs; auto.
   apply destroyed_by_load_caller_save. auto. 
-  simpl. rewrite H1. destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
+  simpl. eapply Val.has_subtype; eauto. destruct a; simpl in H0; try discriminate. eapply Mem.load_type; eauto.
 
   (* Lstore *)
   assert (exists a',
@@ -2698,7 +2719,7 @@ Proof.
   eapply agree_valid_mach; eauto.
   simpl. rewrite list_map_compose.
   change (fun x => Loc.type (R x)) with mreg_type.
-  rewrite H0. eapply external_call_well_typed'; eauto.
+  eapply Val.has_subtype_list; eauto. eapply external_call_well_typed'; eauto.
 
   (* Lannot *)
   exploit transl_annot_params_correct; eauto.
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index a19a0e2..70a02c1 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -104,6 +104,7 @@ Definition eventval_of_val (v: val) (t: typ) : option eventval :=
   match v, t with
   | Vint i, AST.Tint => Some (EVint i)
   | Vfloat f, AST.Tfloat => Some (EVfloat f)
+  | Vfloat f, AST.Tsingle => if Float.is_single_dec f then Some (EVfloatsingle f) else None
   | Vlong n, AST.Tlong => Some (EVlong n)
   | Vptr b ofs, AST.Tint => do id <- Genv.invert_symbol ge b; Some (EVptr_global id ofs)
   | _, _ => None
@@ -123,6 +124,7 @@ Definition val_of_eventval (ev: eventval) (t: typ) : option val :=
   match ev, t with
   | EVint i, AST.Tint => Some (Vint i)
   | EVfloat f, AST.Tfloat => Some (Vfloat f)
+  | EVfloatsingle f, AST.Tsingle => if Float.is_single_dec f then Some (Vfloat f) else None
   | EVlong n, AST.Tlong => Some (Vlong n)
   | EVptr_global id ofs, AST.Tint => do b <- Genv.find_symbol ge id; Some (Vptr b ofs)
   | _, _ => None
@@ -135,6 +137,7 @@ Proof.
   constructor.
   constructor.
   constructor.
+  destruct (Float.is_single_dec f); inv H1. constructor; auto.
   destruct (Genv.invert_symbol ge b) as [id|] eqn:?; inv H1. 
   constructor. apply Genv.invert_find_symbol; auto.
 Qed.
@@ -143,6 +146,7 @@ Lemma eventval_of_val_complete:
   forall ev t v, eventval_match ge ev t v -> eventval_of_val v t = Some ev.
 Proof.
   induction 1; simpl; auto.
+  rewrite pred_dec_true; auto.
   rewrite (Genv.find_invert_symbol _ _ H). auto. 
 Qed.
 
@@ -170,6 +174,7 @@ Proof.
   constructor.
   constructor.
   constructor.
+  destruct (Float.is_single_dec f); inv H1. constructor; auto.
   destruct (Genv.find_symbol ge i) as [b|] eqn:?; inv H1.
   constructor. auto.
 Qed.
@@ -177,7 +182,7 @@ Qed.
 Lemma val_of_eventval_complete:
   forall ev t v, eventval_match ge ev t v -> val_of_eventval ev t = Some v.
 Proof.
-  induction 1; simpl; auto. rewrite H; auto.
+  induction 1; simpl; auto. rewrite pred_dec_true; auto. rewrite H; auto.
 Qed.
 
 (** Volatile memory accesses. *)
diff --git a/cfrontend/Ctypes.v b/cfrontend/Ctypes.v
index bf483a0..8d00b95 100644
--- a/cfrontend/Ctypes.v
+++ b/cfrontend/Ctypes.v
@@ -530,6 +530,7 @@ Fixpoint type_of_params (params: list (ident * type)) : typelist :=
 
 Definition typ_of_type (t: type) : AST.typ :=
   match t with
+  | Tfloat F32 _ => AST.Tsingle
   | Tfloat _ _ => AST.Tfloat
   | Tlong _ _ => AST.Tlong
   | _ => AST.Tint
@@ -538,6 +539,7 @@ Definition typ_of_type (t: type) : AST.typ :=
 Definition opttyp_of_type (t: type) : option AST.typ :=
   match t with
   | Tvoid => None
+  | Tfloat F32 _ => Some AST.Tsingle
   | Tfloat _ _ => Some AST.Tfloat
   | Tlong _ _ => Some AST.Tlong
   | _ => Some AST.Tint
diff --git a/common/AST.v b/common/AST.v
index fba5354..1b1e872 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -35,17 +35,19 @@ Definition ident_eq := peq.
 
 Parameter ident_of_string : String.string -> ident.
 
-(** The intermediate languages are weakly typed, using only three
+(** The intermediate languages are weakly typed, using only four
   types: [Tint] for 32-bit integers and pointers, [Tfloat] for 64-bit
-  floating-point numbers, [Tlong] for 64-bit integers. *)
+  floating-point numbers, [Tlong] for 64-bit integers, [Tsingle]
+  for 32-bit floating-point numbers. *)
 
 Inductive typ : Type :=
   | Tint
   | Tfloat
-  | Tlong.
+  | Tlong
+  | Tsingle.
 
 Definition typesize (ty: typ) : Z :=
-  match ty with Tint => 4 | Tfloat => 8 | Tlong => 8 end.
+  match ty with Tint => 4 | Tfloat => 8 | Tlong => 8 | Tsingle => 4 end.
 
 Lemma typesize_pos: forall ty, typesize ty > 0.
 Proof. destruct ty; simpl; omega. Qed.
@@ -60,6 +62,26 @@ Definition opt_typ_eq: forall (t1 t2: option typ), {t1=t2} + {t1<>t2}
 Definition list_typ_eq: forall (l1 l2: list typ), {l1=l2} + {l1<>l2}
                      := list_eq_dec typ_eq.
 
+(** All values of type [Tsingle] are also of type [Tfloat].  This
+  corresponds to the following subtyping relation over types. *)
+
+Definition subtype (ty1 ty2: typ) : bool :=
+  match ty1, ty2 with
+  | Tint, Tint => true
+  | Tlong, Tlong => true
+  | Tfloat, Tfloat => true
+  | Tsingle, Tsingle => true
+  | Tsingle, Tfloat => true
+  | _, _ => false
+  end.
+
+Fixpoint subtype_list (tyl1 tyl2: list typ) : bool :=
+  match tyl1, tyl2 with
+  | nil, nil => true
+  | ty1::tys1, ty2::tys2 => subtype ty1 ty2 && subtype_list tys1 tys2
+  | _, _ => false
+  end.
+
 (** Additionally, function definitions and function calls are annotated
   by function signatures indicating the number and types of arguments,
   as well as the type of the returned value if any.  These signatures
@@ -103,6 +125,19 @@ Global Opaque chunk_eq.
 (** The type (integer/pointer or float) of a chunk. *)
 
 Definition type_of_chunk (c: memory_chunk) : typ :=
+  match c with
+  | Mint8signed => Tint
+  | Mint8unsigned => Tint
+  | Mint16signed => Tint
+  | Mint16unsigned => Tint
+  | Mint32 => Tint
+  | Mint64 => Tlong
+  | Mfloat32 => Tsingle
+  | Mfloat64 => Tfloat
+  | Mfloat64al32 => Tfloat
+  end.
+
+Definition type_of_chunk_use (c: memory_chunk) : typ :=
   match c with
   | Mint8signed => Tint
   | Mint8unsigned => Tint
@@ -123,6 +158,7 @@ Definition chunk_of_type (ty: typ) :=
   | Tint => Mint32
   | Tfloat => Mfloat64al32
   | Tlong => Mint64
+  | Tsingle => Mfloat32
   end.
 
 (** Initialization data for global variables. *)
@@ -283,6 +319,25 @@ Proof.
   exploit IHl; eauto. intros [v' [P Q]]; exists v'; auto.
 Qed.
 
+Lemma transform_partial_program2_succeeds:
+  forall p tp i g,
+  transform_partial_program2 p = OK tp ->
+  In (i, g) p.(prog_defs) ->
+  match g with
+  | Gfun fd => exists tfd, transf_fun fd = OK tfd
+  | Gvar gv => exists tv, transf_var gv.(gvar_info) = OK tv
+  end.
+Proof.
+  intros. monadInv H. 
+  revert x EQ H0. induction (prog_defs p); simpl; intros.
+  contradiction.
+  destruct a as [id1 g1]. destruct g1.
+  destruct (transf_fun f) eqn:TF; try discriminate. monadInv EQ. 
+  destruct H0. inv H. econstructor; eauto. eapply IHl; eauto.
+  destruct (transf_globvar v) eqn:TV; try discriminate. monadInv EQ.
+  destruct H0. inv H. monadInv TV. econstructor; eauto. eapply IHl; eauto.
+Qed.
+
 Lemma transform_partial_program2_main:
   forall p tp,
   transform_partial_program2 p = OK tp ->
@@ -353,6 +408,16 @@ Proof.
   apply transform_partial_program2_function. 
 Qed.
 
+Lemma transform_partial_program_succeeds:
+  forall p tp i fd,
+  transform_partial_program p = OK tp ->
+  In (i, Gfun fd) p.(prog_defs) ->
+  exists tfd, transf_partial fd = OK tfd.
+Proof.
+  unfold transform_partial_program; intros. 
+  exploit transform_partial_program2_succeeds; eauto. 
+Qed.
+
 End TRANSF_PARTIAL_PROGRAM.
 
 Lemma transform_program_partial_program:
diff --git a/common/Events.v b/common/Events.v
index f342799..be1915a 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -57,6 +57,7 @@ Inductive eventval: Type :=
   | EVint: int -> eventval
   | EVlong: int64 -> eventval
   | EVfloat: float -> eventval
+  | EVfloatsingle: float -> eventval
   | EVptr_global: ident -> int -> eventval.
 
 Inductive event: Type :=
@@ -272,6 +273,9 @@ Inductive eventval_match: eventval -> typ -> val -> Prop :=
       eventval_match (EVlong i) Tlong (Vlong i)
   | ev_match_float: forall f,
       eventval_match (EVfloat f) Tfloat (Vfloat f)
+  | ev_match_single: forall f,
+      Float.is_single f ->
+      eventval_match (EVfloatsingle f) Tsingle (Vfloat f)
   | ev_match_ptr: forall id b ofs,
       Genv.find_symbol ge id = Some b ->
       eventval_match (EVptr_global id ofs) Tint (Vptr b ofs).
@@ -291,7 +295,7 @@ Lemma eventval_match_type:
   forall ev ty v,
   eventval_match ev ty v -> Val.has_type v ty.
 Proof.
-  intros. inv H; constructor.
+  intros. inv H; simpl; auto. 
 Qed.
 
 Lemma eventval_list_match_length:
@@ -330,7 +334,7 @@ Lemma eventval_match_inject:
   forall ev ty v1 v2,
   eventval_match ev ty v1 -> val_inject f v1 v2 -> eventval_match ev ty v2.
 Proof.
-  intros. inv H; inv H0; try constructor.
+  intros. inv H; inv H0; try constructor; auto.
   destruct glob_pres as [A [B C]].
   exploit A; eauto. intro EQ; rewrite H3 in EQ; inv EQ.
   rewrite Int.add_zero. econstructor; eauto. 
@@ -384,6 +388,7 @@ Definition eventval_valid (ev: eventval) : Prop :=
   | EVint _ => True
   | EVlong _ => True
   | EVfloat _ => True
+  | EVfloatsingle f => Float.is_single f
   | EVptr_global id ofs => exists b, Genv.find_symbol ge id = Some b
   end.
 
@@ -392,25 +397,37 @@ Definition eventval_type (ev: eventval) : typ :=
   | EVint _ => Tint
   | EVlong _ => Tlong
   | EVfloat _ => Tfloat
+  | EVfloatsingle _ => Tsingle
   | EVptr_global id ofs => Tint
   end.
 
-Lemma eventval_valid_match:
-  forall ev ty,
-  eventval_valid ev -> eventval_type ev = ty -> exists v, eventval_match ev ty v.
-Proof.
-  intros. subst ty. destruct ev; simpl in *.
+Lemma eventval_match_receptive:
+  forall ev1 ty v1 ev2,
+  eventval_match ev1 ty v1 ->
+  eventval_valid ev1 -> eventval_valid ev2 -> eventval_type ev1 = eventval_type ev2 ->
+  exists v2, eventval_match ev2 ty v2.
+Proof.
+  intros. inv H; destruct ev2; simpl in H2; try discriminate.
+  exists (Vint i0); constructor.
+  destruct H1 as [b EQ]. exists (Vptr b i1); constructor; auto.
+  exists (Vlong i0); constructor.
+  exists (Vfloat f1); constructor.
+  exists (Vfloat f1); constructor; auto.
   exists (Vint i); constructor.
-  exists (Vlong i); constructor.
-  exists (Vfloat f0); constructor.
-  destruct H as [b A]. exists (Vptr b i0); constructor; auto.
+  destruct H1 as [b' EQ]. exists (Vptr b' i0); constructor; auto.
 Qed.
 
 Lemma eventval_match_valid:
-  forall ev ty v,
-  eventval_match ev ty v -> eventval_valid ev /\ eventval_type ev = ty.
+  forall ev ty v, eventval_match ev ty v -> eventval_valid ev.
 Proof.
-  induction 1; simpl; auto. split; auto. exists b; auto.
+  destruct 1; simpl; auto. exists b; auto.
+Qed.
+
+Lemma eventval_match_same_type:
+  forall ev1 ty v1 ev2 v2,
+  eventval_match ev1 ty v1 -> eventval_match ev2 ty v2 -> eventval_type ev1 = eventval_type ev2.
+Proof.
+  destruct 1; intros EV; inv EV; auto. 
 Qed.
 
 End EVENTVAL.
@@ -430,7 +447,7 @@ Lemma eventval_match_preserved:
   forall ev ty v,
   eventval_match ge1 ev ty v -> eventval_match ge2 ev ty v.
 Proof.
-  induction 1; constructor. rewrite symbols_preserved; auto. 
+  induction 1; constructor; auto. rewrite symbols_preserved; auto. 
 Qed.
 
 Lemma eventval_list_match_preserved:
@@ -753,9 +770,8 @@ Lemma volatile_load_receptive:
   exists v2, volatile_load ge chunk m b ofs t2 v2.
 Proof.
   intros. inv H; inv H0.
-  exploit eventval_match_valid; eauto. intros [A B]. 
-  exploit eventval_valid_match. eexact H9. rewrite <- H10; eauto. 
-  intros [v' EVM]. exists (Val.load_result chunk v'). constructor; auto.
+  exploit eventval_match_receptive; eauto. intros [v' EM].
+  exists (Val.load_result chunk v'). constructor; auto.
   exists v1; constructor; auto.
 Qed.
 
@@ -766,8 +782,7 @@ Lemma volatile_load_ok:
 Proof.
   intros; constructor; intros.
 (* well typed *)
-  unfold proj_sig_res; simpl. inv H. inv H0.
-  destruct chunk; destruct v; simpl; constructor.
+  unfold proj_sig_res; simpl. inv H. inv H0. apply Val.load_result_type. 
   eapply Mem.load_type; eauto. 
 (* arity *)
   inv H; inv H0; auto.
@@ -796,15 +811,17 @@ Proof.
 (* determ *)
   inv H; inv H0. inv H1; inv H7; try congruence.
   assert (id = id0) by (eapply Genv.genv_vars_inj; eauto). subst id0.
-  exploit eventval_match_valid. eexact H2. intros [V1 T1].
-  exploit eventval_match_valid. eexact H4. intros [V2 T2].
-  split. constructor; auto. congruence.
+  split. constructor. 
+  eapply eventval_match_valid; eauto.
+  eapply eventval_match_valid; eauto.
+  eapply eventval_match_same_type; eauto.
   intros EQ; inv EQ. 
   assert (v = v0) by (eapply eventval_match_determ_1; eauto). subst v0.
   auto.
   split. constructor. intuition congruence.
 Qed.
 
+
 Inductive volatile_load_global_sem (chunk: memory_chunk) (id: ident) (ofs: int)
                                    (F V: Type) (ge: Genv.t F V):
               list val -> mem -> trace -> val -> mem -> Prop :=
@@ -830,8 +847,7 @@ Lemma volatile_load_global_ok:
 Proof.
   intros; constructor; intros.
 (* well typed *)
-  unfold proj_sig_res; simpl. inv H. inv H1.
-  destruct chunk; destruct v; simpl; constructor.
+  unfold proj_sig_res; simpl. inv H. inv H1. apply Val.load_result_type.
   eapply Mem.load_type; eauto. 
 (* arity *)
   inv H; inv H1; auto.
@@ -1450,15 +1466,15 @@ Proof.
   inv H; simpl; omega.
 (* receptive *)
   inv H; inv H0.
-  exploit eventval_match_valid; eauto. intros [A B]. 
-  exploit eventval_valid_match. eexact H7. rewrite <- H8; eauto. 
-  intros [v' EVM]. exists v'; exists m1. econstructor; eauto. 
+  exploit eventval_match_receptive; eauto. intros [v' EVM].
+  exists v'; exists m1. econstructor; eauto. 
 (* determ *)
   inv H; inv H0.
   assert (args = args0). eapply eventval_list_match_determ_2; eauto. subst args0.
-  exploit eventval_match_valid. eexact H2. intros [V1 T1].
-  exploit eventval_match_valid. eexact H3. intros [V2 T2].
-  split. constructor; auto. congruence.
+  split. constructor.
+  eapply eventval_match_valid; eauto.
+  eapply eventval_match_valid; eauto.
+  eapply eventval_match_same_type; eauto.
   intros EQ; inv EQ. split; auto. eapply eventval_match_determ_1; eauto.
 Qed.
 
diff --git a/common/Memdata.v b/common/Memdata.v
index c62ba99..47f8471 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -473,7 +473,7 @@ Lemma decode_val_type:
 Proof.
   intros. unfold decode_val. 
   destruct (proj_bytes cl). 
-  destruct chunk; simpl; auto. 
+  destruct chunk; simpl; auto. apply Float.single_of_bits_is_single.
   destruct chunk; simpl; auto.
   unfold proj_pointer. destruct cl; try (exact I).
   destruct m; try (exact I).
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index c18b09d..9768447 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -16,7 +16,11 @@ open Printf
 open Camlcoq
 open AST
 
-let name_of_type = function Tint -> "int" | Tfloat -> "float" | Tlong -> "long"
+let name_of_type = function
+  | Tint -> "int"
+  | Tfloat -> "float"
+  | Tlong -> "long"
+  | Tsingle -> "single"
 
 let name_of_chunk = function
   | Mint8signed -> "int8signed"
diff --git a/common/Subtyping.v b/common/Subtyping.v
new file mode 100644
index 0000000..e1bf61a
--- /dev/null
+++ b/common/Subtyping.v
@@ -0,0 +1,808 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* A solver for subtyping constraints. *)
+
+Require Import Recdef Coqlib Maps Errors.
+
+Local Open Scope nat_scope.
+Local Open Scope error_monad_scope.
+
+(** This module provides a solver for sets of subtyping constraints of the
+  following kinds: [base-type <: T(x)] or [T(x) <: base-type] or [T(x) <: T(y)].
+  The unknowns are the types [T(x)] of every identifier [x]. *)
+
+(** The interface for base types and the subtyping relation. *)
+
+Module Type TYPE_ALGEBRA.
+
+(** Type expressions *)
+
+Variable t: Type.
+Variable eq: forall (x y: t), {x=y} + {x<>y}.
+Variable default: t.
+
+(** Subtyping *)
+
+Variable sub: t -> t -> Prop.
+Hypothesis sub_refl: forall x, sub x x.
+Hypothesis sub_trans: forall x y z, sub x y -> sub y z -> sub x z.
+Variable sub_dec: forall x y, {sub x y} + {~sub x y}.
+
+(** Least upper bounds. [lub x y] is specified only when [x] and [y] have
+  a common supertype; it can be arbitrary otherwise. *)
+
+Variable lub: t -> t -> t.
+Hypothesis lub_left: forall x y z, sub x z -> sub y z -> sub x (lub x y).
+Hypothesis lub_right: forall x y z, sub x z -> sub y z -> sub y (lub x y).
+Hypothesis lub_min: forall x y z, sub x z -> sub y z -> sub (lub x y) z.
+
+(** Greater lower bounds. [glb x y] is specified only when [x] and [y] have
+  a common subtype; it can be arbitrary otherwise.*)
+
+Variable glb: t -> t -> t.
+Hypothesis glb_left: forall x y z, sub z x -> sub z y -> sub (glb x y) x.
+Hypothesis glb_right: forall x y z, sub z x -> sub z y -> sub (glb x y) y.
+Hypothesis glb_max: forall x y z, sub z x -> sub z y -> sub z (glb x y).
+
+(** Low and high bounds for a given type. *)
+Variable low_bound: t -> t.
+Variable high_bound: t -> t.
+Hypothesis low_bound_sub: forall t, sub (low_bound t) t.
+Hypothesis low_bound_minorant: forall x y, sub x y -> sub (low_bound y) x.
+Hypothesis high_bound_sub: forall t, sub t (high_bound t).
+Hypothesis high_bound_majorant: forall x y, sub x y -> sub y (high_bound x).
+
+(** Measure over types *)
+
+Variable weight: t -> nat.
+Variable max_weight: nat.
+Hypothesis weight_range: forall t, weight t <= max_weight.
+Hypothesis weight_sub: forall x y, sub x y -> weight x <= weight y.
+Hypothesis weight_sub_strict: forall x y, sub x y -> x <> y -> weight x < weight y.
+
+End TYPE_ALGEBRA.
+
+(** The constraint solver. *)
+
+Module SubSolver (T: TYPE_ALGEBRA).
+
+(* The current set of constraints is represented by a record with two components:
+- [te_typ]: a partial map from variables to pairs [tlo, thi]
+  of types, representing the low and high bounds for this variable.
+- [te_sub]: a list of pairs [(x,y)] of variables, indicating that
+  the type of [x] must be a subtype of the type of [y].
+*)
+
+Inductive bounds : Type := B (lo: T.t) (hi: T.t) (SUB: T.sub lo hi).
+
+Definition constraint : Type := (positive * positive)%type.
+
+Record typenv : Type := Typenv {
+  te_typ: PTree.t bounds;    (**r mapping var -> low & high bounds *)
+  te_sub: list constraint    (**r additional subtyping constraints *)
+}.
+
+Definition initial : typenv := {| te_typ := PTree.empty _; te_sub := nil |}.
+
+(** Add the constraint [ty <: T(x)]. *)
+
+Definition type_def (e: typenv) (x: positive) (ty: T.t) : res typenv :=
+  match e.(te_typ)!x with
+  | None =>
+      let b := B ty (T.high_bound ty) (T.high_bound_sub ty) in
+      OK {| te_typ := PTree.set x b e.(te_typ);
+            te_sub := e.(te_sub) |}
+  | Some(B lo hi s1) =>
+      match T.sub_dec ty hi with
+      | left s2 =>
+          let lo' := T.lub lo ty in
+          if T.eq lo lo' then OK e else
+            let b := B lo' hi (T.lub_min lo ty hi s1 s2) in
+            OK {| te_typ := PTree.set x b e.(te_typ);
+                  te_sub := e.(te_sub) |}
+      | right _ =>
+          Error (MSG "bad definition of variable " :: POS x :: nil)
+      end
+  end.
+
+Fixpoint type_defs (e: typenv) (rl: list positive) (tyl: list T.t) {struct rl}: res typenv :=
+  match rl, tyl with
+  | nil, nil => OK e
+  | r1::rs, ty1::tys => do e1 <- type_def e r1 ty1; type_defs e1 rs tys
+  | _, _ => Error (msg "arity mismatch")
+  end.
+
+(** Add the constraint [T(x) <: ty]. *)
+
+Definition type_use (e: typenv) (x: positive) (ty: T.t) : res typenv :=
+  match e.(te_typ)!x with
+  | None =>
+      let b := B (T.low_bound ty) ty (T.low_bound_sub ty) in
+      OK {| te_typ := PTree.set x b e.(te_typ);
+            te_sub := e.(te_sub) |}
+  | Some(B lo hi s1) =>
+      match T.sub_dec lo ty with
+      | left s2 =>
+          let hi' := T.glb hi ty in
+          if T.eq hi hi' then OK e else
+            let b := B lo hi' (T.glb_max hi ty lo s1 s2) in
+            OK {| te_typ := PTree.set x b e.(te_typ);
+                  te_sub := e.(te_sub) |}
+      | right _ =>
+          Error (MSG "bad use of variable " :: POS x :: nil)
+      end
+  end.
+
+Fixpoint type_uses (e: typenv) (rl: list positive) (tyl: list T.t) {struct rl}: res typenv :=
+  match rl, tyl with
+  | nil, nil => OK e
+  | r1::rs, ty1::tys => do e1 <- type_use e r1 ty1; type_uses e1 rs tys
+  | _, _ => Error (msg "arity mismatch")
+  end.
+
+(** Add the constraint [T(x) <: T(y)].
+    The boolean result is [true] if the types of [r1] and [r2] could be
+    made more precise.  Otherwise, [te_typ] does not change and
+    [false] is returned. *)
+
+Definition type_move (e: typenv) (r1 r2: positive) : res (bool * typenv) :=
+  if peq r1 r2 then OK (false, e) else
+  match e.(te_typ)!r1, e.(te_typ)!r2 with
+  | None, None =>
+      OK (false, {| te_typ := e.(te_typ); te_sub := (r1, r2) :: e.(te_sub) |})
+  | Some(B lo1 hi1 s1), None =>
+      let b2 := B lo1 (T.high_bound lo1) (T.high_bound_sub lo1) in
+      OK (true, {| te_typ := PTree.set r2 b2 e.(te_typ);
+                   te_sub := if T.sub_dec hi1 lo1 then e.(te_sub)
+                             else (r1, r2) :: e.(te_sub) |})
+  | None, Some(B lo2 hi2 s2) =>
+      let b1 := B (T.low_bound hi2) hi2 (T.low_bound_sub hi2) in
+      OK (true, {| te_typ := PTree.set r1 b1 e.(te_typ);
+                   te_sub := if T.sub_dec hi2 lo2 then e.(te_sub)
+                             else (r1, r2) :: e.(te_sub) |})
+  | Some(B lo1 hi1 s1), Some(B lo2 hi2 s2) =>
+      if T.sub_dec hi1 lo2 then
+        (* The constraint is always true, don't record it *)
+        OK (false, e)
+      else match T.sub_dec lo1 hi2 with
+      | left s =>
+          (* Tighten the bounds on [r1] and [r2] when possible and record
+             the constraint. *)
+          let lo2' := T.lub lo1 lo2 in
+          let hi1' := T.glb hi1 hi2 in
+          let b1 := B lo1 hi1' (T.glb_max hi1 hi2 lo1 s1 s) in
+          let b2 := B lo2' hi2 (T.lub_min lo1 lo2 hi2 s s2) in
+          if T.eq lo2 lo2' then
+            if T.eq hi1 hi1' then
+              OK (false, {| te_typ := e.(te_typ);
+                            te_sub := (r1, r2) :: e.(te_sub) |})
+            else
+              OK (true, {| te_typ := PTree.set r1 b1 e.(te_typ);
+                           te_sub := (r1, r2) :: e.(te_sub) |})
+          else
+            if T.eq hi1 hi1' then
+              OK (true, {| te_typ := PTree.set r2 b2 e.(te_typ);
+                           te_sub := (r1, r2) :: e.(te_sub) |})
+            else
+              OK (true, {| te_typ := PTree.set r2 b2 (PTree.set r1 b1 e.(te_typ));
+                           te_sub := (r1, r2) :: e.(te_sub) |})
+      | right _ =>
+          Error(MSG "ill-typed move from " :: POS r1 :: MSG " to " :: POS r2 :: nil)
+      end
+  end.
+
+(** Solve the remaining subtyping constraints by iteration. *)
+
+Fixpoint solve_rec (e: typenv) (changed: bool) (q: list constraint) : res (typenv * bool) :=
+  match q with 
+  | nil =>
+      OK (e, changed)
+  | (r1, r2) :: q' =>
+      do (changed1, e1) <- type_move e r1 r2; solve_rec e1 (changed || changed1) q'
+  end.
+
+(** Measuring the state *)
+
+Definition weight_bounds (ob: option bounds) : nat :=
+  match ob with None => T.max_weight + 1 | Some(B lo hi s) => T.weight hi - T.weight lo end.
+
+Lemma weight_bounds_1:
+  forall lo hi s, weight_bounds (Some (B lo hi s)) < weight_bounds None.
+Proof.
+  intros; simpl. generalize (T.weight_range hi); omega. 
+Qed.
+
+Lemma weight_bounds_2:
+  forall lo1 hi1 s1 lo2 hi2 s2,
+  T.sub lo2 lo1 -> T.sub hi1 hi2 -> lo1 <> lo2 \/ hi1 <> hi2 ->
+  weight_bounds (Some (B lo1 hi1 s1)) < weight_bounds (Some (B lo2 hi2 s2)).
+Proof.
+  intros; simpl. 
+  generalize (T.weight_sub _ _ s1) (T.weight_sub _ _ s2) (T.weight_sub _ _ H) (T.weight_sub _ _ H0); intros.
+  destruct H1.
+  assert (T.weight lo2 < T.weight lo1) by (apply T.weight_sub_strict; auto). omega.
+  assert (T.weight hi1 < T.weight hi2) by (apply T.weight_sub_strict; auto). omega.
+Qed.
+
+Hint Resolve T.sub_refl: ty.
+
+Lemma weight_type_move:
+  forall e r1 r2 changed e',
+  type_move e r1 r2 = OK(changed, e') ->
+  (e'.(te_sub) = e.(te_sub) \/ e'.(te_sub) = (r1, r2) :: e.(te_sub))
+  /\ (forall r, weight_bounds e'.(te_typ)!r <= weight_bounds e.(te_typ)!r)
+  /\ (changed = true ->
+        weight_bounds e'.(te_typ)!r1 + weight_bounds e'.(te_typ)!r2 
+        < weight_bounds e.(te_typ)!r1 + weight_bounds e.(te_typ)!r2).
+Proof.
+  unfold type_move; intros.
+  destruct (peq r1 r2).
+  inv H. split; auto. split; intros. omega. discriminate.
+  destruct (te_typ e)!r1 as [[lo1 hi1 s1]|] eqn:E1;
+  destruct (te_typ e)!r2 as [[lo2 hi2 s2]|] eqn:E2.
+- destruct (T.sub_dec hi1 lo2).
+  inv H. split; auto. split; intros. omega. discriminate.
+  destruct (T.sub_dec lo1 hi2); try discriminate.
+  set (lo2' := T.lub lo1 lo2) in *.
+  set (hi1' := T.glb hi1 hi2) in *.
+  assert (S1': T.sub hi1' hi1) by (eapply T.glb_left; eauto).
+  assert (S2': T.sub lo2 lo2') by (eapply T.lub_right; eauto).
+  set (b1 := B lo1 hi1' (T.glb_max hi1 hi2 lo1 s1 s)) in *.
+  set (b2 := B lo2' hi2 (T.lub_min lo1 lo2 hi2 s s2)) in *.
+Local Opaque weight_bounds.
+  destruct (T.eq lo2 lo2'); destruct (T.eq hi1 hi1'); inversion H; clear H; subst changed e'; simpl.
++ split; auto. split; intros. omega. discriminate.
++ assert (weight_bounds (Some b1) < weight_bounds (Some (B lo1 hi1 s1)))
+  by (apply weight_bounds_2; auto with ty).
+  split; auto. split; intros. 
+  rewrite PTree.gsspec. destruct (peq r r1). subst r. rewrite E1. omega. omega.
+  rewrite PTree.gss. rewrite PTree.gso by auto. rewrite E2. omega. 
++ assert (weight_bounds (Some b2) < weight_bounds (Some (B lo2 hi2 s2)))
+  by (apply weight_bounds_2; auto with ty).
+  split; auto. split; intros. 
+  rewrite PTree.gsspec. destruct (peq r r2). subst r. rewrite E2. omega. omega.
+  rewrite PTree.gss. rewrite PTree.gso by auto. rewrite E1. omega.
++ assert (weight_bounds (Some b1) < weight_bounds (Some (B lo1 hi1 s1)))
+  by (apply weight_bounds_2; auto with ty).
+  assert (weight_bounds (Some b2) < weight_bounds (Some (B lo2 hi2 s2)))
+  by (apply weight_bounds_2; auto with ty).
+  split; auto. split; intros. 
+  rewrite ! PTree.gsspec.
+  destruct (peq r r2). subst r. rewrite E2. omega.
+  destruct (peq r r1). subst r. rewrite E1. omega. 
+  omega.
+  rewrite PTree.gss. rewrite PTree.gso by auto. rewrite PTree.gss. omega.
+
+- set (b2 := B lo1 (T.high_bound lo1) (T.high_bound_sub lo1)) in *.
+  assert (weight_bounds (Some b2) < weight_bounds None) by (apply weight_bounds_1).
+  inv H; simpl.
+  split. destruct (T.sub_dec hi1 lo1); auto.
+  split; intros. 
+  rewrite PTree.gsspec. destruct (peq r r2). subst r; rewrite E2; omega. omega.
+  rewrite PTree.gss. rewrite PTree.gso by auto. rewrite E1. omega. 
+
+- set (b1 := B (T.low_bound hi2) hi2 (T.low_bound_sub hi2)) in *.
+  assert (weight_bounds (Some b1) < weight_bounds None) by (apply weight_bounds_1).
+  inv H; simpl.
+  split. destruct (T.sub_dec hi2 lo2); auto.
+  split; intros. 
+  rewrite PTree.gsspec. destruct (peq r r1). subst r; rewrite E1; omega. omega.
+  rewrite PTree.gss. rewrite PTree.gso by auto. rewrite E2. omega.
+
+- inv H. split; auto. simpl; split; intros. omega. congruence.
+Qed.
+
+Definition weight_constraints (b: PTree.t bounds) (cstr: list constraint) : nat :=
+  List.fold_right (fun xy n => n + weight_bounds b!(fst xy) + weight_bounds b!(snd xy)) 0 cstr.
+
+Remark weight_constraints_tighter:
+  forall b1 b2, (forall r, weight_bounds b1!r <= weight_bounds b2!r) ->
+  forall q, weight_constraints b1 q <= weight_constraints b2 q.
+Proof.
+  induction q; simpl. omega. generalize (H (fst a)) (H (snd a)); omega.
+Qed.
+
+Lemma weight_solve_rec:
+  forall q e changed e' changed',
+  solve_rec e changed q = OK(e', changed') ->
+  (forall r, weight_bounds e'.(te_typ)!r <= weight_bounds e.(te_typ)!r) /\
+  weight_constraints e'.(te_typ) e'.(te_sub) + (if changed' && negb changed then 1 else 0)
+     <= weight_constraints e.(te_typ) e.(te_sub) + weight_constraints e.(te_typ) q.
+Proof.
+  induction q; simpl; intros.
+- inv H. split. intros; omega. replace (changed' && negb changed') with false. 
+  omega. destruct changed'; auto.
+- destruct a as [r1 r2]; monadInv H; simpl.
+  rename x into changed1. rename x0 into e1.
+  exploit weight_type_move; eauto. intros [A [B C]].
+  exploit IHq; eauto. intros [D E].
+  split. intros. eapply le_trans. eapply D. eapply B.
+  assert (P: weight_constraints (te_typ e1) (te_sub e) <=
+             weight_constraints (te_typ e) (te_sub e))
+  by (apply weight_constraints_tighter; auto).
+  assert (Q: weight_constraints (te_typ e1) (te_sub e1) <=
+             weight_constraints (te_typ e1) (te_sub e) + 
+             weight_bounds (te_typ e1)!r1 + weight_bounds (te_typ e1)!r2).
+  { destruct A as [Q|Q]; rewrite Q. omega. simpl. omega. }
+  assert (R: weight_constraints (te_typ e1) q <= weight_constraints (te_typ e) q)
+  by (apply weight_constraints_tighter; auto).
+  set (ch1 := if changed' && negb (changed || changed1) then 1 else 0) in *.
+  set (ch2 := if changed' && negb changed then 1 else 0) in *.
+  destruct changed1.
+  assert (ch2 <= ch1 + 1). 
+  { unfold ch2, ch1. rewrite orb_true_r. simpl. rewrite andb_false_r. 
+    destruct (changed' && negb changed); omega. }
+  exploit C; eauto. omega.
+  assert (ch2 <= ch1).
+  { unfold ch2, ch1. rewrite orb_false_r. omega. }
+  generalize (B r1) (B r2); omega.  
+Qed.
+
+Definition weight_typenv (e: typenv) : nat := 
+  weight_constraints e.(te_typ) e.(te_sub).
+
+(** Iterative solving of the remaining constraints *)
+
+Function solve_constraints (e: typenv) {measure weight_typenv e}: res typenv :=
+  match solve_rec {| te_typ := e.(te_typ); te_sub := nil |} false e.(te_sub) with
+  | OK(e', false) => OK e                   (**r no more changes, fixpoint reached *)
+  | OK(e', true)  => solve_constraints e'   (**r one more iteration *)
+  | Error msg => Error msg
+  end.
+Proof.
+  intros. exploit weight_solve_rec; eauto. simpl. intros [A B].
+  unfold weight_typenv. omega. 
+Qed.
+
+Definition typassign := positive -> T.t.
+
+Definition makeassign (e: typenv) : typassign :=
+  fun x => match e.(te_typ)!x with Some(B lo hi s) => lo | None => T.default end.
+
+Definition solve (e: typenv) : res typassign :=
+  do e' <- solve_constraints e; OK(makeassign e').
+
+(** What it means to be a solution *)
+
+Definition satisf (te: typassign) (e: typenv) : Prop :=
+   (forall x lo hi s, e.(te_typ)!x = Some(B lo hi s) -> T.sub lo (te x) /\ T.sub (te x) hi)
+/\ (forall x y, In (x, y) e.(te_sub) -> T.sub (te x) (te y)).
+
+Lemma satisf_initial: forall te, satisf te initial.
+Proof.
+  unfold initial; intros; split; simpl; intros. 
+  rewrite PTree.gempty in H; discriminate.
+  contradiction.
+Qed.
+
+(** Soundness proof *)
+
+Lemma type_def_incr:
+  forall te x ty e e', type_def e x ty = OK e' -> satisf te e' -> satisf te e.
+Proof.
+  unfold type_def; intros. destruct (te_typ e)!x as [[lo hi s1]|] eqn:E.
+- destruct (T.sub_dec ty hi); try discriminate.
+  destruct (T.eq lo (T.lub lo ty)); monadInv H.
+  subst e'; auto.
+  destruct H0 as [P Q]; split; auto; intros.
+  destruct (peq x x0). 
+  + subst x0. rewrite E in H; inv H.
+    exploit (P x); simpl. rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.lub lo0 ty); auto. eapply T.lub_left; eauto. 
+  + eapply P; simpl. rewrite PTree.gso; eauto. 
+- inv H. destruct H0 as [P Q]; split; auto; intros.
+  eapply P; simpl. rewrite PTree.gso; eauto. congruence.
+Qed.
+
+Hint Resolve type_def_incr: ty.
+
+Lemma type_def_sound:
+  forall te x ty e e', type_def e x ty = OK e' -> satisf te e' -> T.sub ty (te x).
+Proof.
+  unfold type_def; intros. destruct H0 as [P Q].
+  destruct (te_typ e)!x as [[lo hi s1]|] eqn:E.
+- destruct (T.sub_dec ty hi); try discriminate.
+  destruct (T.eq lo (T.lub lo ty)); monadInv H.
+  + subst e'. apply T.sub_trans with lo. 
+    rewrite e0. eapply T.lub_right; eauto. eapply P; eauto.
+  + apply T.sub_trans with (T.lub lo ty). 
+    eapply T.lub_right; eauto. 
+    eapply (P x). simpl. rewrite PTree.gss; eauto. 
+- inv H. eapply (P x); simpl. rewrite PTree.gss; eauto. 
+Qed.
+
+Lemma type_defs_incr:
+  forall te xl tyl e e', type_defs e xl tyl = OK e' -> satisf te e' -> satisf te e.
+Proof.
+  induction xl; destruct tyl; simpl; intros; monadInv H; eauto with ty.
+Qed.
+
+Hint Resolve type_defs_incr: ty.
+
+Lemma type_defs_sound:
+  forall te xl tyl e e', type_defs e xl tyl = OK e' -> satisf te e' -> list_forall2 T.sub tyl (map te xl).
+Proof.
+  induction xl; destruct tyl; simpl; intros; monadInv H.
+  constructor.
+  constructor; eauto. eapply type_def_sound; eauto with ty.
+Qed.
+
+Lemma type_use_incr:
+  forall te x ty e e', type_use e x ty = OK e' -> satisf te e' -> satisf te e.
+Proof.
+  unfold type_use; intros. destruct (te_typ e)!x as [[lo hi s1]|] eqn:E.
+- destruct (T.sub_dec lo ty); try discriminate.
+  destruct (T.eq hi (T.glb hi ty)); monadInv H.
+  subst e'; auto.
+  destruct H0 as [P Q]; split; auto; intros.
+  destruct (peq x x0). 
+  + subst x0. rewrite E in H; inv H.
+    exploit (P x); simpl. rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.glb hi0 ty); auto. eapply T.glb_left; eauto. 
+  + eapply P; simpl. rewrite PTree.gso; eauto. 
+- inv H. destruct H0 as [P Q]; split; auto; intros.
+  eapply P; simpl. rewrite PTree.gso; eauto. congruence.
+Qed.
+
+Hint Resolve type_use_incr: ty.
+
+Lemma type_use_sound:
+  forall te x ty e e', type_use e x ty = OK e' -> satisf te e' -> T.sub (te x) ty.
+Proof.
+  unfold type_use; intros. destruct H0 as [P Q].
+  destruct (te_typ e)!x as [[lo hi s1]|] eqn:E.
+- destruct (T.sub_dec lo ty); try discriminate.
+  destruct (T.eq hi (T.glb hi ty)); monadInv H.
+  + subst e'. apply T.sub_trans with hi. 
+    eapply P; eauto. rewrite e0. eapply T.glb_right; eauto. 
+  + apply T.sub_trans with (T.glb hi ty). 
+    eapply (P x). simpl. rewrite PTree.gss; eauto. 
+    eapply T.glb_right; eauto. 
+- inv H. eapply (P x); simpl. rewrite PTree.gss; eauto. 
+Qed.
+
+Lemma type_uses_incr:
+  forall te xl tyl e e', type_uses e xl tyl = OK e' -> satisf te e' -> satisf te e.
+Proof.
+  induction xl; destruct tyl; simpl; intros; monadInv H; eauto with ty.
+Qed.
+
+Hint Resolve type_uses_incr: ty.
+
+Lemma type_uses_sound:
+  forall te xl tyl e e', type_uses e xl tyl = OK e' -> satisf te e' -> list_forall2 T.sub (map te xl) tyl.
+Proof.
+  induction xl; destruct tyl; simpl; intros; monadInv H.
+  constructor.
+  constructor; eauto. eapply type_use_sound; eauto with ty.
+Qed.
+
+Lemma type_move_incr:
+  forall te e r1 r2 e' changed,
+  type_move e r1 r2 = OK(changed, e') -> satisf te e' -> satisf te e.
+Proof.
+  unfold type_move; intros. destruct H0 as [P Q].
+  destruct (peq r1 r2). inv H; split; auto.
+  destruct (te_typ e)!r1 as [[lo1 hi1 s1]|] eqn:E1;
+  destruct (te_typ e)!r2 as [[lo2 hi2 s2]|] eqn:E2.
+- destruct (T.sub_dec hi1 lo2). inv H; split; auto.
+  destruct (T.sub_dec lo1 hi2); try discriminate.
+  set (lo := T.lub lo1 lo2) in *. set (hi := T.glb hi1 hi2) in *.
+  destruct (T.eq lo2 lo); destruct (T.eq hi1 hi); monadInv H; simpl in *.
+  + subst e'; simpl in *. split; auto. 
+  + subst e'; simpl in *. split; auto. intros. destruct (peq x r1).
+    subst x. 
+    rewrite E1 in H. injection H; intros; subst lo0 hi0. 
+    exploit (P r1). rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.glb hi1 hi2); auto. eapply T.glb_left; eauto.
+    eapply P. rewrite PTree.gso; eauto.
+  + subst e'; simpl in *. split; auto. intros. destruct (peq x r2).
+    subst x. 
+    rewrite E2 in H. injection H; intros; subst lo0 hi0. 
+    exploit (P r2). rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.lub lo1 lo2); auto. eapply T.lub_right; eauto.
+    eapply P. rewrite PTree.gso; eauto.
+  + split; auto. intros. 
+    destruct (peq x r1). subst x. 
+    rewrite E1 in H. injection H; intros; subst lo0 hi0. 
+    exploit (P r1). rewrite PTree.gso; eauto. rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.glb hi1 hi2); auto. eapply T.glb_left; eauto.
+    destruct (peq x r2). subst x. 
+    rewrite E2 in H. injection H; intros; subst lo0 hi0. 
+    exploit (P r2). rewrite PTree.gss; eauto. intuition. 
+    apply T.sub_trans with (T.lub lo1 lo2); auto. eapply T.lub_right; eauto.
+    eapply P. rewrite ! PTree.gso; eauto.
+- inv H; simpl in *. split; intros.
+  eapply P. rewrite PTree.gso; eauto. congruence.
+  apply Q.   destruct (T.sub_dec hi1 lo1); auto with coqlib.
+- inv H; simpl in *. split; intros.
+  eapply P. rewrite PTree.gso; eauto. congruence.
+  apply Q.   destruct (T.sub_dec hi2 lo2); auto with coqlib.
+- inv H; simpl in *. split; auto. 
+Qed.
+
+Hint Resolve type_move_incr: ty.
+
+Lemma type_move_sound:
+  forall te e r1 r2 e' changed,
+  type_move e r1 r2 = OK(changed, e') -> satisf te e' -> T.sub (te r1) (te r2).
+Proof.
+  unfold type_move; intros. destruct H0 as [P Q].
+  destruct (peq r1 r2). subst r2. apply T.sub_refl.
+  destruct (te_typ e)!r1 as [[lo1 hi1 s1]|] eqn:E1;
+  destruct (te_typ e)!r2 as [[lo2 hi2 s2]|] eqn:E2.
+- destruct (T.sub_dec hi1 lo2). 
+  inv H. apply T.sub_trans with hi1. eapply P; eauto. apply T.sub_trans with lo2; auto. eapply P; eauto.
+  destruct (T.sub_dec lo1 hi2); try discriminate.
+  set (lo := T.lub lo1 lo2) in *. set (hi := T.glb hi1 hi2) in *.
+  destruct (T.eq lo2 lo); destruct (T.eq hi1 hi); monadInv H; simpl in *.
+  + subst e'; simpl in *. apply Q; auto.
+  + subst e'; simpl in *. apply Q; auto. 
+  + subst e'; simpl in *. apply Q; auto. 
+  + apply Q; auto.
+- inv H; simpl in *. destruct (T.sub_dec hi1 lo1).
+  + apply T.sub_trans with hi1. eapply P; eauto. rewrite PTree.gso; eauto. 
+    apply T.sub_trans with lo1; auto. eapply P. rewrite PTree.gss; eauto. 
+  + auto with coqlib.
+- inv H; simpl in *. destruct (T.sub_dec hi2 lo2).
+  + apply T.sub_trans with hi2. eapply P. rewrite PTree.gss; eauto. 
+    apply T.sub_trans with lo2; auto. eapply P. rewrite PTree.gso; eauto. 
+  + auto with coqlib.
+- inv H. simpl in Q; auto. 
+Qed.
+
+Lemma solve_rec_incr:
+  forall te q e changed e' changed',
+  solve_rec e changed q = OK(e', changed') -> satisf te e' -> satisf te e.
+Proof.
+  induction q; simpl; intros.
+- inv H. auto.
+- destruct a as [r1 r2]; monadInv H. eauto with ty.
+Qed.
+
+Lemma solve_rec_sound:
+  forall te r1 r2 q e changed e' changed',
+  solve_rec e changed q = OK(e', changed') -> In (r1, r2) q -> satisf te e' -> 
+  T.sub (te r1) (te r2).
+Proof.
+  induction q; simpl; intros.
+- contradiction.
+- destruct a as [r3 r4]; monadInv H. destruct H0. 
+  + inv H. eapply type_move_sound; eauto. eapply solve_rec_incr; eauto.
+  + eapply IHq; eauto with ty.
+Qed.
+
+Lemma type_move_false:
+  forall e r1 r2 e',
+  type_move e r1 r2 = OK(false, e') ->
+  te_typ e' = te_typ e /\ T.sub (makeassign e r1) (makeassign e r2).
+Proof.
+  unfold type_move; intros. 
+  destruct (peq r1 r2). inv H. split; auto. apply T.sub_refl.
+  unfold makeassign;
+  destruct (te_typ e)!r1 as [[lo1 hi1 s1]|] eqn:E1;
+  destruct (te_typ e)!r2 as [[lo2 hi2 s2]|] eqn:E2.
+- destruct (T.sub_dec hi1 lo2). 
+  inv H. split; auto. eapply T.sub_trans; eauto.
+  destruct (T.sub_dec lo1 hi2); try discriminate.
+  set (lo := T.lub lo1 lo2) in *. set (hi := T.glb hi1 hi2) in *.
+  destruct (T.eq lo2 lo); destruct (T.eq hi1 hi); try discriminate.
+  monadInv H. split; auto. rewrite e0. unfold lo. eapply T.lub_left; eauto.
+- discriminate.
+- discriminate.
+- inv H. split; auto. apply T.sub_refl.
+Qed.
+
+Lemma solve_rec_false:
+  forall r1 r2 q e changed e',
+  solve_rec e changed q = OK(e', false) ->
+  changed = false /\ 
+  (In (r1, r2) q -> T.sub (makeassign e r1) (makeassign e r2)).
+Proof.
+  induction q; simpl; intros.
+- inv H. tauto. 
+- destruct a as [r3 r4]; monadInv H.
+  exploit IHq; eauto. intros [P Q].
+  destruct changed; try discriminate. destruct x; try discriminate.
+  exploit type_move_false; eauto. intros [U V].
+  split. auto. intros [A|A]. inv A. auto. exploit Q; auto. 
+  unfold makeassign; rewrite U; auto.    
+Qed.
+
+Lemma solve_constraints_incr:
+  forall te e e', solve_constraints e = OK e' -> satisf te e' -> satisf te e.
+Proof.
+  intros te e; functional induction (solve_constraints e); intros.
+- inv H. auto.
+- exploit solve_rec_incr; eauto. intros [A B].
+  split; auto. intros; eapply solve_rec_sound; eauto.
+- discriminate.
+Qed.
+
+Lemma solve_constraints_sound:
+  forall e e', solve_constraints e = OK e' -> satisf (makeassign e') e'.
+Proof.
+  intros e0; functional induction (solve_constraints e0); intros.
+- inv H. split; intros.
+  unfold makeassign; rewrite H. split; auto with ty.
+  exploit solve_rec_false. eauto. intros [A B]. eapply B; eauto. 
+- eauto.
+- discriminate.
+Qed.
+
+Theorem solve_sound:
+  forall e te, solve e = OK te -> satisf te e.
+Proof.
+  unfold solve; intros. monadInv H. 
+  eapply solve_constraints_incr. eauto. eapply solve_constraints_sound; eauto.
+Qed.
+
+(** Completeness proof *)
+
+Lemma type_def_complete:
+  forall te e x ty,
+  satisf te e -> T.sub ty (te x) -> exists e', type_def e x ty = OK e' /\ satisf te e'.
+Proof.
+  unfold type_def; intros. destruct H as [P Q].
+  destruct (te_typ e)!x as [[lo hi s1]|] eqn:E. 
+- destruct (T.sub_dec ty hi). 
+  destruct (T.eq lo (T.lub lo ty)).
+  exists e; split; auto. split; auto.
+  econstructor; split; eauto. split; simpl; auto; intros. 
+  rewrite PTree.gsspec in H. destruct (peq x0 x). 
+  inv H. exploit P; eauto. intuition. eapply T.lub_min; eauto. 
+  eapply P; eauto.
+  elim n. apply T.sub_trans with (te x); auto. eapply P; eauto.
+- econstructor; split; eauto. split; simpl; auto; intros.
+  rewrite PTree.gsspec in H. destruct (peq x0 x). 
+  inv H. split; auto. apply T.high_bound_majorant; auto.
+  eapply P; eauto.
+Qed.
+
+Lemma type_defs_complete:
+  forall te xl tyl e,
+  satisf te e -> list_forall2 T.sub tyl (map te xl) ->
+  exists e', type_defs e xl tyl = OK e' /\ satisf te e'.
+Proof.
+  induction xl; intros; inv H0; simpl.
+  econstructor; eauto.
+  exploit (type_def_complete te e a a1); auto. intros (e1 & P & Q).
+  exploit (IHxl al e1); auto. intros (e2 & U & V).
+  exists e2; split; auto. rewrite P; auto.
+Qed.
+
+Lemma type_use_complete:
+  forall te e x ty,
+  satisf te e -> T.sub (te x) ty -> exists e', type_use e x ty = OK e' /\ satisf te e'.
+Proof.
+  unfold type_use; intros. destruct H as [P Q].
+  destruct (te_typ e)!x as [[lo hi s1]|] eqn:E. 
+- destruct (T.sub_dec lo ty). 
+  destruct (T.eq hi (T.glb hi ty)).
+  exists e; split; auto. split; auto.
+  econstructor; split; eauto. split; simpl; auto; intros. 
+  rewrite PTree.gsspec in H. destruct (peq x0 x). 
+  inv H. exploit P; eauto. intuition. eapply T.glb_max; eauto. 
+  eapply P; eauto.
+  elim n. apply T.sub_trans with (te x); auto. eapply P; eauto.
+- econstructor; split; eauto. split; simpl; auto; intros.
+  rewrite PTree.gsspec in H. destruct (peq x0 x). 
+  inv H. split; auto. apply T.low_bound_minorant; auto.
+  eapply P; eauto.
+Qed.
+
+Lemma type_uses_complete:
+  forall te xl tyl e,
+  satisf te e -> list_forall2 T.sub (map te xl) tyl ->
+  exists e', type_uses e xl tyl = OK e' /\ satisf te e'.
+Proof.
+  induction xl; intros; inv H0; simpl.
+  econstructor; eauto.
+  exploit (type_use_complete te e a b1); auto. intros (e1 & P & Q).
+  exploit (IHxl bl e1); auto. intros (e2 & U & V).
+  exists e2; split; auto. rewrite P; auto.
+Qed.
+
+Lemma type_move_complete:
+  forall te e r1 r2,
+  satisf te e -> T.sub (te r1) (te r2) ->
+  exists changed e', type_move e r1 r2 = OK(changed, e') /\ satisf te e'.
+Proof.
+  unfold type_move; intros. elim H; intros P Q.
+  assert (Q': forall x y, In (x, y) ((r1, r2) :: te_sub e) -> T.sub (te x) (te y)).
+  { intros. destruct H1; auto. congruence. }
+  destruct (peq r1 r2). econstructor; econstructor; eauto.
+  destruct (te_typ e)!r1 as [[lo1 hi1 s1]|] eqn:E1;
+  destruct (te_typ e)!r2 as [[lo2 hi2 s2]|] eqn:E2.
+- exploit (P r1); eauto. intros [L1 U1].
+  exploit (P r2); eauto. intros [L2 U2].
+  destruct (T.sub_dec hi1 lo2). econstructor; econstructor; eauto.
+  destruct (T.sub_dec lo1 hi2). 
+  destruct (T.eq lo2 (T.lub lo1 lo2)); destruct (T.eq hi1 (T.glb hi1 hi2));
+  econstructor; econstructor; split; eauto; split; auto; simpl; intros.
+  + rewrite PTree.gsspec in H1. destruct (peq x r1). 
+    clear e0. inv H1. split; auto.  
+    apply T.glb_max. auto. apply T.sub_trans with (te r2); auto.
+    eapply P; eauto.
+  + rewrite PTree.gsspec in H1. destruct (peq x r2). 
+    clear e0. inv H1. split; auto.  
+    apply T.lub_min. apply T.sub_trans with (te r1); auto. auto.
+    eapply P; eauto.
+  + rewrite ! PTree.gsspec in H1. destruct (peq x r2).
+    inv H1. split; auto. apply T.lub_min; auto. apply T.sub_trans with (te r1); auto.
+    destruct (peq x r1).
+    inv H1. split; auto. apply T.glb_max; auto. apply T.sub_trans with (te r2); auto.
+    eapply P; eauto.
+  + elim n1. apply T.sub_trans with (te r1); auto. 
+    apply T.sub_trans with (te r2); auto. 
+- econstructor; econstructor; split; eauto; split.
+  + simpl; intros. rewrite PTree.gsspec in H1. destruct (peq x r2).
+    inv H1. exploit P; eauto. intuition. 
+    apply T.sub_trans with (te r1); auto.
+    apply T.high_bound_majorant. apply T.sub_trans with (te r1); auto.
+    eapply P; eauto.
+  + destruct (T.sub_dec hi1 lo1); auto.
+- econstructor; econstructor; split; eauto; split.
+  + simpl; intros. rewrite PTree.gsspec in H1. destruct (peq x r1).
+    inv H1. exploit P; eauto. intuition. 
+    apply T.low_bound_minorant. apply T.sub_trans with (te r2); auto.
+    apply T.sub_trans with (te r2); auto.
+    eapply P; eauto.
+  + destruct (T.sub_dec hi2 lo2); auto.
+- econstructor; econstructor; split; eauto; split; auto.
+Qed.
+
+Lemma solve_rec_complete:
+  forall te q e changed,
+  satisf te e ->
+  (forall r1 r2, In (r1, r2) q -> T.sub (te r1) (te r2)) ->
+  exists e' changed', solve_rec e changed q = OK(e', changed') /\ satisf te e'.
+Proof.
+  induction q; simpl; intros.
+- econstructor; econstructor; eauto.
+- destruct a as [r1 r2]. 
+  exploit (type_move_complete te e r1 r2); auto. intros (changed1 & e1 & A & B).
+  exploit (IHq e1 (changed || changed1)); auto. intros (e' & changed' & C & D).
+  exists e'; exists changed'. rewrite A; simpl; rewrite C; auto.
+Qed.
+
+Lemma solve_constraints_complete:
+  forall te e, satisf te e -> exists e', solve_constraints e = OK e' /\ satisf te e'.
+Proof.
+  intros te e. functional induction (solve_constraints e); intros. 
+- exists e; auto.
+- exploit (solve_rec_complete te (te_sub e) {| te_typ := te_typ e; te_sub := nil |} false).
+  destruct H; split; auto. simpl; tauto.
+  destruct H; auto.
+  intros (e1 & changed1 & P & Q). 
+  apply IHr. congruence. 
+- exploit (solve_rec_complete te (te_sub e) {| te_typ := te_typ e; te_sub := nil |} false).
+  destruct H; split; auto. simpl; tauto.
+  destruct H; auto.
+  intros (e1 & changed1 & P & Q). 
+  congruence.
+Qed.
+
+Lemma solve_complete:
+  forall te e, satisf te e -> exists te', solve e = OK te'.
+Proof.
+  intros. unfold solve. 
+  destruct (solve_constraints_complete te e H) as (e' & P & Q). 
+  econstructor. rewrite P. simpl. eauto. 
+Qed.
+
+End SubSolver.
+
diff --git a/common/Values.v b/common/Values.v
index 76fb230..bb97cdc 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -61,6 +61,7 @@ Definition has_type (v: val) (t: typ) : Prop :=
   | Vint _, Tint => True
   | Vlong _, Tlong => True
   | Vfloat _, Tfloat => True
+  | Vfloat f, Tsingle => Float.is_single f
   | Vptr _ _, Tint => True
   | _, _ => False
   end.
@@ -78,6 +79,23 @@ Definition has_opttype (v: val) (ot: option typ) : Prop :=
   | Some t => has_type v t
   end.
 
+Lemma has_subtype:
+  forall ty1 ty2 v,
+  subtype ty1 ty2 = true -> has_type v ty1 -> has_type v ty2.
+Proof.
+  intros. destruct ty1; destruct ty2; simpl in H; discriminate || assumption || idtac.
+  unfold has_type in *. destruct v; auto. 
+Qed.
+
+Lemma has_subtype_list:
+  forall tyl1 tyl2 vl,
+  subtype_list tyl1 tyl2 = true -> has_type_list vl tyl1 -> has_type_list vl tyl2.
+Proof.
+  induction tyl1; intros; destruct tyl2; try discriminate; destruct vl; try contradiction.
+  red; auto.
+  simpl in *. InvBooleans. destruct H0. split; auto. eapply has_subtype; eauto.
+Qed.
+
 (** Truth values.  Pointers and non-zero integers are treated as [True].
   The integer 0 (also used to represent the null pointer) is [False].
   [Vundef] and floats are neither true nor false. *)
@@ -603,14 +621,14 @@ Definition cmplu (c: comparison) (v1 v2: val): option val :=
 
 End COMPARISONS.
 
-(** [load_result] is used in the memory model (library [Mem])
-  to post-process the results of a memory read.  For instance,
-  consider storing the integer value [0xFFF] on 1 byte at a
-  given address, and reading it back.  If it is read back with
+(** [load_result] reflects the effect of storing a value with a given
+  memory chunk, then reading it back with the same chunk.  Depending
+  on the chunk and the type of the value, some normalization occurs.
+  For instance, consider storing the integer value [0xFFF] on 1 byte
+  at a given address, and reading it back.  If it is read back with
   chunk [Mint8unsigned], zero-extension must be performed, resulting
-  in [0xFF].  If it is read back as a [Mint8signed], sign-extension
-  is performed and [0xFFFFFFFF] is returned.   Type mismatches
-  (e.g. reading back a float as a [Mint32]) read back as [Vundef]. *)
+  in [0xFF].  If it is read back as a [Mint8signed], sign-extension is
+  performed and [0xFFFFFFFF] is returned. *)
 
 Definition load_result (chunk: memory_chunk) (v: val) :=
   match chunk, v with
@@ -626,6 +644,19 @@ Definition load_result (chunk: memory_chunk) (v: val) :=
   | _, _ => Vundef
   end.
 
+Lemma load_result_type:
+  forall chunk v, has_type (load_result chunk v) (type_of_chunk chunk).
+Proof.
+  intros. destruct chunk; destruct v; simpl; auto. apply Float.singleoffloat_is_single.
+Qed.
+
+Lemma load_result_same:
+  forall v ty, has_type v ty -> load_result (chunk_of_type ty) v = v.
+Proof.
+  unfold has_type; intros. destruct v; destruct ty; try contradiction; auto.
+  simpl. rewrite Float.singleoffloat_of_single; auto. 
+Qed.
+
 (** Theorems on arithmetic operations. *)
 
 Theorem cast8unsigned_and:
diff --git a/driver/Interp.ml b/driver/Interp.ml
index 8d8d8eb..25c8b30 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -49,6 +49,7 @@ let print_id_ofs p (id, ofs) =
 let print_eventval p = function
   | EVint n -> fprintf p "%ld" (camlint_of_coqint n)
   | EVfloat f -> fprintf p "%F" (camlfloat_of_coqfloat f)
+  | EVfloatsingle f -> fprintf p "%F" (camlfloat_of_coqfloat f)
   | EVlong n -> fprintf p "%Ld" (camlint64_of_coqint n)
   | EVptr_global(id, ofs) -> fprintf p "&%a" print_id_ofs (id, ofs)
 
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 09dead3..0609ac0 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -115,8 +115,6 @@ Inductive instruction: Type :=
   | Pmov_ra (rd: ireg) (id: ident)
   | Pmov_rm (rd: ireg) (a: addrmode)
   | Pmov_mr (a: addrmode) (rs: ireg)
-  | Pmovd_fr (rd: freg) (r1: ireg)      (**r [movd] (32-bit int) *)
-  | Pmovd_rf (rd: ireg) (rd: freg)
   | Pmovsd_ff (rd: freg) (r1: freg)     (**r [movsd] (single 64-bit float) *)
   | Pmovsd_fi (rd: freg) (n: float)     (**r (pseudo-instruction) *)
   | Pmovsd_fm (rd: freg) (a: addrmode)
@@ -129,16 +127,16 @@ Inductive instruction: Type :=
   (** Moves with conversion *)
   | Pmovb_mr (a: addrmode) (rs: ireg)   (**r [mov] (8-bit int) *)
   | Pmovw_mr (a: addrmode) (rs: ireg)   (**r [mov] (16-bit int) *)
-  | Pmovzb_rr (rd: ireg) (rs: ireg)    (**r [movzb] (8-bit zero-extension) *)
+  | Pmovzb_rr (rd: ireg) (rs: ireg)     (**r [movzb] (8-bit zero-extension) *)
   | Pmovzb_rm (rd: ireg) (a: addrmode)
-  | Pmovsb_rr (rd: ireg) (rs: ireg)    (**r [movsb] (8-bit sign-extension) *)
+  | Pmovsb_rr (rd: ireg) (rs: ireg)     (**r [movsb] (8-bit sign-extension) *)
   | Pmovsb_rm (rd: ireg) (a: addrmode)
-  | Pmovzw_rr (rd: ireg) (rs: ireg)    (**r [movzw] (16-bit zero-extension) *)
+  | Pmovzw_rr (rd: ireg) (rs: ireg)     (**r [movzw] (16-bit zero-extension) *)
   | Pmovzw_rm (rd: ireg) (a: addrmode)
-  | Pmovsw_rr (rd: ireg) (rs: ireg)    (**r [movsw] (16-bit sign-extension) *)
+  | Pmovsw_rr (rd: ireg) (rs: ireg)     (**r [movsw] (16-bit sign-extension) *)
   | Pmovsw_rm (rd: ireg) (a: addrmode)
   | Pcvtss2sd_fm (rd: freg) (a: addrmode) (**r [cvtss2sd] (single float load) *)
-  | Pcvtsd2ss_ff (rd: freg) (r1: freg) (**r pseudo (single conversion) *)
+  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single (pseudo) *)
   | Pcvtsd2ss_mf (a: addrmode) (r1: freg) (**r [cvtsd2ss] (single float store *)
   | Pcvttsd2si_rf (rd: ireg) (r1: freg) (**r double to signed int *)
   | Pcvtsi2sd_fr (rd: freg) (r1: ireg)  (**r signed int to double *)
@@ -185,7 +183,7 @@ Inductive instruction: Type :=
   | Pjmp_s (symb: ident)
   | Pjmp_r (r: ireg)
   | Pjcc (c: testcond)(l: label)
-  | Pjcc2 (c1 c2: testcond)(l: label)
+  | Pjcc2 (c1 c2: testcond)(l: label)   (**r pseudo *)
   | Pjmptbl (r: ireg) (tbl: list label) (**r pseudo *)
   | Pcall_s (symb: ident)
   | Pcall_r (r: ireg)
@@ -481,10 +479,6 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       exec_load Mint32 m a rs rd
   | Pmov_mr a r1 =>
       exec_store Mint32 m a rs r1 nil
-  | Pmovd_fr rd r1 =>
-      Next (nextinstr (rs#rd <- (rs r1))) m
-  | Pmovd_rf rd r1 =>
-      Next (nextinstr (rs#rd <- (rs r1))) m
   | Pmovsd_ff rd r1 =>
       Next (nextinstr (rs#rd <- (rs r1))) m
   | Pmovsd_fi rd n =>
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index 78f7d6e..4543ac9 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -103,6 +103,9 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
       | ST0  => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tsingle =>
+      do r <- freg_of dst;
+      OK (Pcvtss2sd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
   | Tlong =>
       Error (msg "Asmgen.loadind")
   end.
@@ -118,6 +121,9 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
       | ST0  => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tsingle =>
+      do r <- freg_of src;
+      OK (Pcvtsd2ss_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
   | Tlong =>
       Error (msg "Asmgen.storeind")
   end.
diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v
index ca0fd18..f6eefbd 100644
--- a/ia32/Asmgenproof.v
+++ b/ia32/Asmgenproof.v
@@ -172,6 +172,7 @@ Proof.
   TailNoLabel.
   destruct (preg_of dst); TailNoLabel.
   discriminate.
+  TailNoLabel.
 Qed.
 
 Remark storeind_label:
@@ -183,6 +184,7 @@ Proof.
   TailNoLabel.
   destruct (preg_of src); TailNoLabel.
   discriminate.
+  TailNoLabel.
 Qed.
 
 Remark mk_setcc_base_label:
@@ -506,6 +508,8 @@ Proof.
   exists rs'; split. eauto.
   split. eapply agree_undef_regs; eauto. 
   simpl; intros. rewrite Q; auto with asmgen.
+Local Transparent destroyed_by_setstack.
+  destruct ty; simpl; intuition congruence.
 
 - (* Mgetparam *)
   assert (f0 = f) by congruence; subst f0.
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index 303337e..00b706c 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -341,6 +341,12 @@ Proof.
   intuition Simplifs.
   (* long *)
   inv H.
+  (* single *)
+  monadInv H.
+  rewrite (freg_of_eq _ _ EQ). econstructor.
+  split. apply exec_straight_one. simpl. unfold exec_load. rewrite H1. rewrite H0.
+  eauto. auto.
+  intuition Simplifs.
 Qed.
 
 Lemma storeind_correct:
@@ -349,7 +355,7 @@ Lemma storeind_correct:
   Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, data_preg r = true -> r <> ST0 -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
   unfold storeind; intros.
   set (addr := Addrmode (Some base) None (inl (ident * int) ofs)) in *.
@@ -372,6 +378,12 @@ Proof.
   intros. simpl. Simplifs.
   (* long *)
   inv H.
+  (* single *)
+  monadInv H.
+  rewrite (freg_of_eq _ _ EQ) in H0. econstructor.
+  split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. rewrite H0.
+  simpl. eauto. auto.
+  intros. destruct H2. Simplifs.
 Qed.
 
 (** Translation of addressing modes *)
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index 31ea8ee..528e9ed 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -124,6 +124,13 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
 Definition destroyed_at_function_entry: list mreg :=
   DX :: FP0 :: nil.   (* must include destroyed_by_op Omove *)
 
+Definition destroyed_by_setstack (ty: typ): list mreg :=
+  match ty with
+  | Tfloat => FP0 :: nil
+  | Tsingle => X7 :: FP0 :: nil
+  | _ => nil
+  end.
+
 Definition temp_for_parent_frame: mreg :=
   DX.
 
@@ -164,7 +171,7 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list
 Global Opaque
     destroyed_by_op destroyed_by_load destroyed_by_store
     destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
-    destroyed_at_function_entry temp_for_parent_frame
+    destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame
     mregs_for_operation mregs_for_builtin.
 
 (** Two-address operations.  Return [true] if the first argument and
diff --git a/ia32/Op.v b/ia32/Op.v
index 998f34d..4ac961b 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -295,7 +295,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst _ => (nil, Tfloat)
+  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
   | Oindirectsymbol _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
   | Ocast8unsigned => (Tint :: nil, Tint)
@@ -331,7 +331,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
-  | Osingleoffloat => (Tfloat :: nil, Tfloat)
+  | Osingleoffloat => (Tfloat :: nil, Tsingle)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
@@ -377,7 +377,7 @@ Proof with (try exact I).
   destruct op; simpl in H0; FuncInv; subst; simpl.
   congruence.
   exact I.
-  exact I.
+  destruct (Float.is_single_dec f); auto. 
   unfold symbol_address; destruct (Genv.find_symbol genv i)...
   destruct v0...
   destruct v0...
@@ -416,7 +416,7 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0...
+  destruct v0... apply Float.singleoffloat_is_single.
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
@@ -425,19 +425,6 @@ Proof with (try exact I).
   destruct (eval_condition c vl m); simpl... destruct b... 
 Qed.
 
-Lemma type_of_chunk_correct:
-  forall chunk m addr v,
-  Mem.loadv chunk m addr = Some v ->
-  Val.has_type v (type_of_chunk chunk).
-Proof.
-  intro chunk.
-  assert (forall v, Val.has_type (Val.load_result chunk v) (type_of_chunk chunk)).
-  destruct v; destruct chunk; exact I.
-  intros until v. unfold Mem.loadv. 
-  destruct addr; intros; try discriminate.
-  eapply Mem.load_type; eauto.
-Qed.
-
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 5ee4d01..8820515 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -281,7 +281,7 @@ let print_annot_val oc txt args res =
   | [IR src], [IR dst] ->
       if dst <> src then fprintf oc "	movl	%a, %a\n" ireg src ireg dst
   | [FR src], [FR dst] ->
-      if dst <> src then fprintf oc "	movsd	%a, %a\n" freg src freg dst
+      if dst <> src then fprintf oc "	movapd	%a, %a\n" freg src freg dst
   | _, _ -> assert false
 
 (* Handling of memcpy *)
@@ -442,7 +442,7 @@ let print_builtin_inline oc name args res =
   | "__builtin_fabs", [FR a1], [FR res] ->
       need_masks := true;
       if a1 <> res then
-        fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
+        fprintf oc "	movapd	%a, %a\n" freg a1 freg res;
       fprintf oc "	andpd	%a, %a\n" raw_symbol "__absd_mask" freg res
   | "__builtin_fsqrt", [FR a1], [FR res] ->
       fprintf oc "	sqrtsd	%a, %a\n" freg a1 freg res
@@ -452,7 +452,7 @@ let print_builtin_inline oc name args res =
       else if res = a2 then
         fprintf oc "	maxsd	%a, %a\n" freg a1 freg res
       else begin
-        fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
+        fprintf oc "	movapd	%a, %a\n" freg a1 freg res;
         fprintf oc "	maxsd	%a, %a\n" freg a2 freg res
       end
   | "__builtin_fmin", [FR a1; FR a2], [FR res] ->
@@ -461,7 +461,7 @@ let print_builtin_inline oc name args res =
       else if res = a2 then
         fprintf oc "	minsd	%a, %a\n" freg a1 freg res
       else begin
-        fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
+        fprintf oc "	movapd	%a, %a\n" freg a1 freg res;
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       end
   (* 64-bit integer arithmetic *)
@@ -531,10 +531,6 @@ let print_instruction oc = function
       fprintf oc "	movl	%a, %a\n" addressing a ireg rd
   | Pmov_mr(a, r1) ->
       fprintf oc "	movl	%a, %a\n" ireg r1 addressing a
-  | Pmovd_fr(rd, r1) ->
-      fprintf oc "	movd	%a, %a\n" ireg r1 freg rd
-  | Pmovd_rf(rd, r1) ->
-      fprintf oc "	movd	%a, %a\n" freg r1 ireg rd
   | Pmovsd_ff(rd, r1) ->
       fprintf oc "	movapd	%a, %a\n" freg r1 freg rd
   | Pmovsd_fi(rd, n) ->
diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v
index cae20f6..e097e85 100644
--- a/ia32/standard/Conventions1.v
+++ b/ia32/standard/Conventions1.v
@@ -220,7 +220,7 @@ Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => AX :: nil
   | Some Tint => AX :: nil
-  | Some Tfloat => FP0 :: nil
+  | Some (Tfloat | Tsingle) => FP0 :: nil
   | Some Tlong => DX :: AX :: nil
   end.
 
@@ -261,6 +261,7 @@ Fixpoint loc_arguments_rec
   | nil => nil
   | Tint :: tys => S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 1)
   | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2)
+  | Tsingle :: tys => S Outgoing ofs Tsingle :: loc_arguments_rec tys (ofs + 1)
   | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2)
   end.
 
@@ -314,6 +315,9 @@ Proof.
   destruct H. subst l; split; [omega|congruence].
   exploit IHtyl; eauto.
   destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H. subst l. split. omega. congruence.
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -362,6 +366,7 @@ Proof.
   destruct H. inv H. 
   simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. 
   auto.
+  destruct H. inv H. apply size_arguments_rec_above. auto.
 Qed.
 
 
diff --git a/lib/Floats.v b/lib/Floats.v
index 02ff25c..3edf39c 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -319,6 +319,28 @@ Proof.
   intros; unfold singleoffloat; rewrite binary32offloatofbinary32; reflexivity.
 Qed.
 
+Definition is_single (f: float) : Prop := exists s, f = floatofbinary32 s.
+
+Theorem singleoffloat_is_single:
+  forall f, is_single (singleoffloat f).
+Proof.
+  intros. exists (binary32offloat f); auto.
+Qed.
+
+Theorem singleoffloat_of_single:
+  forall f, is_single f -> singleoffloat f = f.
+Proof.
+  intros. destruct H as [s EQ]. subst f. unfold singleoffloat. 
+  rewrite binary32offloatofbinary32; reflexivity.
+Qed.
+
+Theorem is_single_dec: forall f, {is_single f} + {~is_single f}.
+Proof.
+  intros. case (eq_dec (singleoffloat f) f); intros.
+  unfold singleoffloat in e. left. exists (binary32offloat f). auto.
+  right; red; intros; elim n. apply singleoffloat_of_single; auto.
+Defined.
+
 (** Properties of comparisons. *)
 
 Theorem order_float_finite_correct:
@@ -470,6 +492,12 @@ Proof.
   intro; unfold singleoffloat, single_of_bits; rewrite binary32offloatofbinary32; reflexivity.
 Qed.
 
+Theorem single_of_bits_is_single:
+  forall b, is_single (single_of_bits b).
+Proof.
+  intros. exists (b32_of_bits (Int.unsigned b)); auto.
+Qed.
+
 (** Conversions between floats and unsigned ints can be defined
   in terms of conversions between floats and signed ints.
   (Most processors provide only the latter, forcing the compiler
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 6a1d07e..2b6d80d 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -136,7 +136,7 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
             Plfd r (Cint ofs) base :: k
           else
             loadimm GPR0 ofs (Plfdx r base GPR0 :: k))
-  | Tlong =>
+  | Tlong | Tsingle =>
       Error (msg "Asmgen.loadind")
   end.
 
@@ -154,7 +154,7 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
             Pstfd r (Cint ofs) base :: k
           else
             loadimm GPR0 ofs (Pstfdx r base GPR0 :: k))
-  | Tlong =>
+  | Tlong | Tsingle =>
       Error (msg "Asmgen.storeind")
   end.
 
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index e723c86..2c155ea 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -526,9 +526,8 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. 
-  apply agree_exten with rs0; auto with asmgen.
-  simpl; intros. rewrite Q; auto with asmgen.
+  split. eapply agree_undef_regs; eauto with asmgen.
+  simpl; intros. rewrite Q; auto with asmgen. 
 
 - (* Mgetparam *)
   assert (f0 = f) by congruence; subst f0.
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index ce66e6a..d057dce 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -143,6 +143,12 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   | _ => nil
   end.
 
+Definition destroyed_by_setstack (ty: typ): list mreg :=
+  match ty with
+  | Tsingle => F13 :: nil
+  | _ => nil
+  end.
+
 Definition destroyed_at_function_entry: list mreg :=
   nil.
 
@@ -158,7 +164,7 @@ Definition mregs_for_builtin (ef: external_function): list (option mreg) * list
 Global Opaque
     destroyed_by_op destroyed_by_load destroyed_by_store
     destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
-    destroyed_at_function_entry temp_for_parent_frame
+    destroyed_by_setstack destroyed_at_function_entry temp_for_parent_frame
     mregs_for_operation mregs_for_builtin.
 
 (** Two-address operations.  Return [true] if the first argument and
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 5835717..3963c6b 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -265,7 +265,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst _ => (nil, Tfloat)
+  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
@@ -303,7 +303,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
-  | Osingleoffloat => (Tfloat :: nil, Tfloat)
+  | Osingleoffloat => (Tfloat :: nil, Tsingle)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofwords => (Tint :: Tint :: nil, Tfloat)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
@@ -340,7 +340,7 @@ Proof with (try exact I).
   destruct op; simpl in H0; FuncInv; subst; simpl.
   congruence.
   exact I.
-  exact I.
+  destruct (Float.is_single_dec f); auto. 
   unfold symbol_address. destruct (Genv.find_symbol genv i)...
   destruct sp...
   destruct v0...
@@ -380,7 +380,7 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0...
+  destruct v0... simpl. apply Float.singleoffloat_is_single.
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
@@ -389,19 +389,6 @@ Proof with (try exact I).
   destruct (eval_condition c vl m); simpl... destruct b... 
 Qed.
 
-Lemma type_of_chunk_correct:
-  forall chunk m addr v,
-  Mem.loadv chunk m addr = Some v ->
-  Val.has_type v (type_of_chunk chunk).
-Proof.
-  intro chunk.
-  assert (forall v, Val.has_type (Val.load_result chunk v) (type_of_chunk chunk)).
-  destruct v; destruct chunk; exact I.
-  intros until v. unfold Mem.loadv. 
-  destruct addr; intros; try discriminate.
-  eapply Mem.load_type; eauto.
-Qed.
-
 End SOUNDNESS.
 
 (** * Manipulating and transforming operations *)
diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v
index 369a81a..2db1f73 100644
--- a/powerpc/eabi/Conventions1.v
+++ b/powerpc/eabi/Conventions1.v
@@ -240,7 +240,7 @@ Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R3 :: nil
   | Some Tint => R3 :: nil
-  | Some Tfloat => F1 :: nil
+  | Some (Tfloat | Tsingle) => F1 :: nil
   | Some Tlong => R3 :: R4 :: nil
   end.
 
@@ -285,7 +285,7 @@ Fixpoint loc_arguments_rec
       | Some ireg =>
           R ireg :: loc_arguments_rec tys (ir + 1) fr ofs
       end
-  | Tfloat :: tys =>
+  | (Tfloat | Tsingle) :: tys =>
       match list_nth_z float_param_regs fr with
       | None =>
           let ofs := align ofs 2 in
@@ -321,7 +321,7 @@ Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
       | None => size_arguments_rec tys ir fr (ofs + 1)
       | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
       end
-  | Tfloat :: tys =>
+  | (Tfloat | Tsingle) :: tys =>
       match list_nth_z float_param_regs fr with
       | None => size_arguments_rec tys ir fr (align ofs 2 + 2)
       | Some freg => size_arguments_rec tys ir (fr + 1) ofs
@@ -396,6 +396,14 @@ Opaque list_nth_z.
   destruct H. subst. split. omega. congruence. 
   destruct H. subst. split. omega. congruence.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. 
+- (* single *)
+  destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. 
+  subst. right. eapply list_nth_z_in; eauto.
+  eapply IHtyl; eauto.
+  subst. split. apply Zle_ge. apply align_le. omega. congruence.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto.
+  assert (ofs <= align ofs 2) by (apply align_le; omega). 
+  intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -430,6 +438,9 @@ Proof.
   apply Zle_trans with (align ofs0 2 + 2); auto; omega.
   apply Zle_trans with (align ofs0 2). apply align_le; omega.
   apply Zle_trans with (align ofs0 2 + 2); auto; omega.
+  destruct (list_nth_z float_param_regs fr); eauto.
+  apply Zle_trans with (align ofs0 2). apply align_le; omega.
+  apply Zle_trans with (align ofs0 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -477,7 +488,12 @@ Proof.
   transitivity (align ofs0 2 + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
   destruct H0. inv H0.
   transitivity (align ofs0 2 + 2). simpl; omega. eauto.  apply size_arguments_rec_above.
-  eauto. 
-}
+  eauto.
+- (* single *)
+  destruct (list_nth_z float_param_regs fr); destruct H0. 
+  congruence.
+  eauto.
+  inv H0. apply size_arguments_rec_above. eauto. 
+  }
   eauto.
 Qed.
diff --git a/test/c/Makefile b/test/c/Makefile
index d14cb23..1444073 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -10,8 +10,8 @@ LIBS=$(LIBMATH)
 TIME=xtime -o /dev/null -mintime 2.0     # Xavier's hack
 #TIME=time >/dev/null                    # Otherwise
 
-PROGS=fib integr qsort fft fftw sha1 sha3 aes almabench lists \
-  binarytrees fannkuch knucleotide mandelbrot nbody \
+PROGS=fib integr qsort fft fftsp fftw sha1 sha3 aes almabench \
+  lists binarytrees fannkuch knucleotide mandelbrot nbody \
   nsieve nsievebits spectral vmach \
   bisect chomp perlin siphash24
 
diff --git a/test/c/Results/fftsp b/test/c/Results/fftsp
new file mode 100644
index 0000000..cbeb099
--- /dev/null
+++ b/test/c/Results/fftsp
@@ -0,0 +1 @@
+4096 points, result OK
diff --git a/test/c/fftsp.c b/test/c/fftsp.c
new file mode 100644
index 0000000..42ae905
--- /dev/null
+++ b/test/c/fftsp.c
@@ -0,0 +1,196 @@
+/*
+ C Program: Test_Fast_Fourier_Transform_in_Double_Precision (TFFTDP.c)
+ by: Dave Edelblute, edelblut@cod.nosc.mil, 05 Jan 1993
+*/
+
+/* Like fft.c, but using single-precision floats */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef PI
+#define PI  3.14159265358979323846
+#endif
+
+/********************************************************/
+/*     A Duhamel-Hollman split-radix dif fft            */
+/*     Ref: Electronics Letters, Jan. 5, 1984           */
+/*     Complex input and output data in arrays x and y  */
+/*     Length is n.                                     */
+/********************************************************/
+
+int dfft(float x[], float y[], int np)
+{
+  float *px,*py;
+  int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4;
+  float  a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt,tpi;
+
+  px = x - 1; 
+  py = y - 1;
+  i = 2; 
+  m = 1; 
+  
+  while (i < np) 
+  { 
+    i = i+i; 
+    m = m+1; 
+  }
+  
+  n = i; 
+  
+  if (n != np) {
+    for (i = np+1; i <= n; i++) {
+      px[i] = 0.0; 
+      py[i] = 0.0; 
+    }
+    /*printf("nuse %d point fft",n); */
+  }
+
+  n2 = n+n;
+  tpi = 2.0 * PI;
+  for (k = 1;  k <= m-1; k++ ) {
+    n2 = n2 / 2; 
+    n4 = n2 / 4; 
+    e  = tpi / (double)n2; 
+    a  = 0.0;
+    
+    for (j = 1; j<= n4 ; j++) {
+      a3 = 3.0 * a; 
+      cc1 = cosf(a); 
+      ss1 = sinf(a);
+      
+      cc3 = cosf(a3); 
+      ss3 = sinf(a3); 
+      a = e * (double)j; 
+      is = j; 
+      id = 2 * n2;
+	  
+      while ( is < n ) {
+        for (i0 = is; i0 <= n-1; i0 = i0 + id) {
+          i1 = i0 + n4; 
+          i2 = i1 + n4; 
+          i3 = i2 + n4;
+          r1 = px[i0] - px[i2];
+          px[i0] = px[i0] + px[i2];
+          r2 = px[i1] - px[i3];
+          px[i1] = px[i1] + px[i3];
+          s1 = py[i0] - py[i2];
+          py[i0] = py[i0] + py[i2];
+          s2 = py[i1] - py[i3];
+          py[i1] = py[i1] + py[i3];
+          s3 = r1 - s2; r1 = r1 + s2; 
+          s2 = r2 - s1; r2 = r2 + s1;
+          px[i2] = r1*cc1 - s2*ss1; 
+          py[i2] = -s2*cc1 - r1*ss1;
+          px[i3] = s3*cc3 + r2*ss3;
+          py[i3] = r2*cc3 - s3*ss3;
+        }
+        is = 2 * id - n2 + j; 
+        id = 4 * id;
+      }
+    }
+  }
+  
+/************************************/
+/*  Last stage, length=2 butterfly  */
+/************************************/
+  is = 1; 
+  id = 4;
+  
+  while ( is < n) {
+    for (i0 = is; i0 <= n; i0 = i0 + id) {
+      i1 = i0 + 1; 
+      r1 = px[i0];
+      px[i0] = r1 + px[i1];
+      px[i1] = r1 - px[i1];
+      r1 = py[i0];
+      py[i0] = r1 + py[i1];
+      py[i1] = r1 - py[i1];
+    }
+    is = 2*id - 1; 
+    id = 4 * id; 
+  }
+  
+/*************************/
+/*  Bit reverse counter  */
+/*************************/
+  j = 1; 
+  n1 = n - 1;
+  
+  for (i = 1; i <= n1; i++) {
+    if (i < j) {
+      xt = px[j];
+      px[j] = px[i]; 
+      px[i] = xt;
+      xt = py[j]; 
+      py[j] = py[i];
+      py[i] = xt;
+    }
+    
+    k = n / 2; 
+    while (k < j) {
+      j = j - k; 
+      k = k / 2; 
+    }
+    j = j + k;
+  }
+
+/*
+  for (i = 1; i<=16; i++) printf("%d  %g   %gn",i,px[i],py[i]);
+*/
+  
+  return(n);
+}
+
+/* Test harness */
+
+float * xr, * xi;
+
+int main(int argc, char ** argv)
+{
+  int n, np, npm, n2, i, j;
+  float enp, t, y, z, zr, zi, zm, a;
+  float * pxr, * pxi;
+
+  if (argc >= 2) n = atoi(argv[1]); else n = 12;
+  np = 1 << n;
+  enp = np; 
+  npm = np / 2  - 1;  
+  t = PI / enp;
+  xr = calloc(np, sizeof(float));
+  xi = calloc(np, sizeof(float));
+  pxr = xr;
+  pxi = xi;
+  *pxr = (enp - 1.0) * 0.5;
+  *pxi = 0.0;
+  n2 = np / 2;  
+  *(pxr+n2) = -0.5;
+  *(pxi+n2) =  0.0;
+  for (i = 1; i <= npm; i++) {
+    j = np - i;
+    *(pxr+i) = -0.5;
+    *(pxr+j) = -0.5;
+    z = t * (double)i;  
+    y = -0.5*(cosf(z)/sinf(z));
+    *(pxi+i) =  y;
+    *(pxi+j) = -y;
+  }
+  dfft(xr,xi,np);
+  zr = 0.0; 
+  zi = 0.0; 
+  npm = np-1;
+  for (i = 0; i <= npm; i++ ) {
+    a = fabsf(pxr[i] - i);
+    if (zr < a) zr = a;
+    a = fabsf(pxi[i]);
+    if (zi < a) zi = a;
+  }
+  zm = zr;
+  if (zr < zi) zm = zi;
+  if (zm < 1e-3)
+    printf("%d points, result OK\n", np);
+  else
+    printf("%d points, WRONG result %e\n", np, zm);
+  return 0;
+}
-- 
cgit v1.2.3