Merge of "newspilling" branch:

- Support single-precision floats as first-class values - Introduce chunks Many32, Many64 and types Tany32, Tany64 to support saving and restoring registers without knowing the exact types (int/single/float) of their contents, just their sizes. - Memory model: generalize the opaque encoding of pointers to apply to any value, not just pointers, if chunks Many32/Many64 are selected. - More properties of FP arithmetic proved. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2537 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-07-23 08:54:56 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-07-23 08:54:56 +0000
commit: 2a0168fea37b68ad14e2cb60bf215111e49d4870 (patch)
tree: 2f59373790d8ce3a5df66ef7a692271cf0666c6c /arm
parent: 00805153cf9b88aa07cc6694b17d93f5ba2e7de8 (diff)
16 files changed, 864 insertions, 236 deletions
diff --git a/arm/Archi.v b/arm/Archi.v
index e693541..5657f31 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -25,12 +25,24 @@ Definition big_endian := false.
 Notation align_int64 := 8%Z (only parsing).
 Notation align_float64 := 8%Z (only parsing).
 
-Program Definition default_pl : bool * nan_pl 53 := (false, nat_iter 51 xO xH).
+Program Definition default_pl_64 : bool * nan_pl 53 :=
+  (false, nat_iter 51 xO xH).
 
-Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
   (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
       In all other cases, choose first NaN *)
   (Pos.testbit (proj1_sig pl1) 51 &&
    negb (Pos.testbit (proj1_sig pl2) 51))%bool.
 
-Global Opaque big_endian default_pl choose_binop_pl.
+Program Definition default_pl_32 : bool * nan_pl 24 :=
+  (false, nat_iter 22 xO xH).
+
+Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+  (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
+      In all other cases, choose first NaN *)
+  (Pos.testbit (proj1_sig pl1) 22 &&
+   negb (Pos.testbit (proj1_sig pl2) 22))%bool.
+
+Global Opaque big_endian
+              default_pl_64 choose_binop_pl_64
+              default_pl_32 choose_binop_pl_32.
diff --git a/arm/Asm.v b/arm/Asm.v
index 9d3ba5b..f054db0 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -143,6 +143,7 @@ Inductive instruction : Type :=
   | Pcmp: ireg -> shift_op -> instruction         (**r integer comparison *)
   | Peor: ireg -> ireg -> shift_op -> instruction (**r bitwise exclusive or *)
   | Pldr: ireg -> ireg -> shift_addr -> instruction (**r int32 load *)
+  | Pldr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 load to int register *)
   | Pldrb: ireg -> ireg -> shift_addr -> instruction (**r unsigned int8 load *)
   | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
   | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *)
@@ -155,6 +156,7 @@ Inductive instruction : Type :=
   | Porr: ireg -> ireg -> shift_op -> instruction  (**r bitwise or *)
   | Prsb: ireg -> ireg -> shift_op -> instruction  (**r integer reverse subtraction *)
   | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *)
+  | Pstr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 store from int register *)
   | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *)
   | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *)
   | Psdiv: instruction                              (**r signed division *)
@@ -172,15 +174,31 @@ Inductive instruction : Type :=
   | Pfsubd: freg -> freg -> freg -> instruction     (**r float subtraction *)
   | Pflid: freg -> float -> instruction             (**r load float constant *)
   | Pfcmpd: freg -> freg -> instruction             (**r float comparison *)
-  | Pfcmpzd: freg -> instruction             (**r float comparison with 0.0 *)
+  | Pfcmpzd: freg -> instruction                    (**r float comparison with 0.0 *)
   | Pfsitod: freg -> ireg -> instruction            (**r signed int to float *)
   | Pfuitod: freg -> ireg -> instruction            (**r unsigned int to float *)
   | Pftosizd: ireg -> freg -> instruction           (**r float to signed int *)
   | Pftouizd: ireg -> freg -> instruction           (**r float to unsigned int *)
-  | Pfcvtsd: freg -> freg -> instruction            (**r round to singled precision *)
+  | Pfabss: freg -> freg -> instruction             (**r float absolute value *)
+  | Pfnegs: freg -> freg -> instruction             (**r float opposite *)
+  | Pfadds: freg -> freg -> freg -> instruction     (**r float addition *)
+  | Pfdivs: freg -> freg -> freg -> instruction     (**r float division *)
+  | Pfmuls: freg -> freg -> freg -> instruction     (**r float multiplication *)
+  | Pfsubs: freg -> freg -> freg -> instruction     (**r float subtraction *)
+  | Pflis: freg -> float32 -> instruction           (**r load float constant *)
+  | Pfcmps: freg -> freg -> instruction             (**r float comparison *)
+  | Pfcmpzs: freg -> instruction                    (**r float comparison with 0.0 *)
+  | Pfsitos: freg -> ireg -> instruction            (**r signed int to float *)
+  | Pfuitos: freg -> ireg -> instruction            (**r unsigned int to float *)
+  | Pftosizs: ireg -> freg -> instruction           (**r float to signed int *)
+  | Pftouizs: ireg -> freg -> instruction           (**r float to unsigned int *)
+  | Pfcvtsd: freg -> freg -> instruction            (**r round to single precision *)
+  | Pfcvtds: freg -> freg -> instruction            (**r expand to double precision *)
   | Pfldd: freg -> ireg -> int -> instruction       (**r float64 load *)
+  | Pfldd_a: freg -> ireg -> int -> instruction     (**r any64 load to FP reg *)
   | Pflds: freg -> ireg -> int -> instruction       (**r float32 load *)
   | Pfstd: freg -> ireg -> int -> instruction       (**r float64 store *)
+  | Pfstd_a: freg -> ireg -> int -> instruction     (**r any64 store from FP reg *)
   | Pfsts: freg -> ireg -> int -> instruction       (**r float32 store *)
 
   (* Pseudo-instructions *)
@@ -392,7 +410,7 @@ Definition compare_int (rs: regset) (v1 v2: val) (m: mem) :=
     #CC <- (Val.cmpu (Mem.valid_pointer m) Cge v1 v2)
     #CV <- (Val.sub_overflow v1 v2).
 
-(** Semantics of [fcmpd] instruction:
+(** Semantics of [fcmp] instructions:
 <<
 ==	N=0 Z=1 C=1 V=0
 <	N=1 Z=0 C=0 V=0
@@ -415,6 +433,20 @@ Definition compare_float (rs: regset) (v1 v2: val) :=
         #CV <- Vundef
   end.
 
+Definition compare_float32 (rs: regset) (v1 v2: val) :=
+  match v1, v2 with
+  | Vsingle f1, Vsingle f2 =>
+      rs#CN <- (Val.of_bool (Float32.cmp Clt f1 f2))
+        #CZ <- (Val.of_bool (Float32.cmp Ceq f1 f2))
+        #CC <- (Val.of_bool (negb (Float32.cmp Clt f1 f2)))
+        #CV <- (Val.of_bool (negb (Float32.cmp Ceq f1 f2 || Float32.cmp Clt f1 f2 || Float32.cmp Cgt f1 f2)))
+  | _, _ =>
+      rs#CN <- Vundef
+        #CZ <- Vundef
+        #CC <- Vundef
+        #CV <- Vundef
+  end.
+
 (** Testing a condition *)
 
 Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
@@ -523,6 +555,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m
   | Pldr r1 r2 sa => 
       exec_load Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+  | Pldr_a r1 r2 sa => 
+      exec_load Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pldrb r1 r2 sa => 
       exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pldrh r1 r2 sa => 
@@ -551,6 +585,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m
   | Pstr r1 r2 sa => 
       exec_store Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+  | Pstr_a r1 r2 sa => 
+      exec_store Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pstrb r1 r2 sa => 
       exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pstrh r1 r2 sa => 
@@ -606,19 +642,48 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intoffloat rs#r2)))) m
   | Pftouizd r1 r2 =>
       Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuoffloat rs#r2)))) m
+  | Pfabss r1 r2 => 
+      Next (nextinstr (rs#r1 <- (Val.absfs rs#r2))) m
+  | Pfnegs r1 r2 => 
+      Next (nextinstr (rs#r1 <- (Val.negfs rs#r2))) m
+  | Pfadds r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.addfs rs#r2 rs#r3))) m
+  | Pfdivs r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.divfs rs#r2 rs#r3))) m
+  | Pfmuls r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.mulfs rs#r2 rs#r3))) m
+  | Pfsubs r1 r2 r3 =>     
+      Next (nextinstr (rs#r1 <- (Val.subfs rs#r2 rs#r3))) m
+  | Pflis r1 f =>            
+      Next (nextinstr (rs#r1 <- (Vsingle f))) m
+  | Pfcmps r1 r2 =>              
+      Next (nextinstr (compare_float32 rs rs#r1 rs#r2)) m
+  | Pfcmpzs r1 =>              
+      Next (nextinstr (compare_float32 rs rs#r1 (Vsingle Float32.zero))) m
+  | Pfsitos r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.singleofint rs#r2)))) m
+  | Pfuitos r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.maketotal (Val.singleofintu rs#r2)))) m
+  | Pftosizs r1 r2 =>
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intofsingle rs#r2)))) m
+  | Pftouizs r1 r2 =>
+      Next (nextinstr (rs #FR6 <- Vundef #r1 <- (Val.maketotal (Val.intuofsingle rs#r2)))) m
   | Pfcvtsd r1 r2 =>
       Next (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
+  | Pfcvtds r1 r2 =>
+      Next (nextinstr (rs#r1 <- (Val.floatofsingle rs#r2))) m
   | Pfldd r1 r2 n =>
       exec_load Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+  | Pfldd_a r1 r2 n =>
+      exec_load Many64 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pflds r1 r2 n =>
       exec_load Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfstd r1 r2 n =>      
       exec_store Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+  | Pfstd_a r1 r2 n =>      
+      exec_store Many64 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfsts r1 r2 n =>
-      match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with
-      | Next rs' m' => Next (rs'#FR6 <- Vundef) m'
-      | Stuck => Stuck
-      end
+      exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
   (* Pseudo-instructions *)
   | Pallocframe sz pos =>             
       let (m1, stk) := Mem.alloc m 0 sz in
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 2513a5e..fa4faa6 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -185,6 +185,18 @@ Definition transl_cond
   | Cnotcompfzero cmp, a1 :: nil =>
       do r1 <- freg_of a1;
       OK (Pfcmpzd r1 :: k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmps r1 r2 :: k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfcmps r1 r2 :: k)
+  | Ccompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmpzs r1 :: k)
+  | Cnotcompfszero cmp, a1 :: nil =>
+      do r1 <- freg_of a1;
+      OK (Pfcmpzs r1 :: k)
   | _, _ =>
       Error(msg "Asmgen.transl_cond")
   end.
@@ -241,6 +253,10 @@ Definition cond_for_cond (cond: condition) :=
   | Cnotcompf cmp => cond_for_float_not_cmp cmp
   | Ccompfzero cmp => cond_for_float_cmp cmp
   | Cnotcompfzero cmp => cond_for_float_not_cmp cmp
+  | Ccompfs cmp => cond_for_float_cmp cmp
+  | Cnotcompfs cmp => cond_for_float_not_cmp cmp
+  | Ccompfszero cmp => cond_for_float_cmp cmp
+  | Cnotcompfszero cmp => cond_for_float_not_cmp cmp
   end.
 
 (** Translation of the arithmetic operation [r <- op(args)].
@@ -261,6 +277,9 @@ Definition transl_op
   | Ofloatconst f, nil =>
       do r <- freg_of res;
       OK (Pflid r f :: k)
+  | Osingleconst f, nil =>
+      do r <- freg_of res;
+      OK (Pflis r f :: k)
   | Oaddrsymbol s ofs, nil =>
       do r <- ireg_of res;
       OK (Ploadsymbol r s ofs :: k)
@@ -400,9 +419,30 @@ Definition transl_op
   | Odivf, a1 :: a2 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
       OK (Pfdivd r r1 r2 :: k)
+  | Onegfs, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfnegs r r1 :: k)
+  | Oabsfs, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfabss r r1 :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfadds r r1 r2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfsubs r r1 r2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfmuls r r1 r2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; do r2 <- freg_of a2;
+      OK (Pfdivs r r1 r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1;
       OK (Pfcvtsd r r1 :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1;
+      OK (Pfcvtds r r1 :: k)
   | Ointoffloat, a1 :: nil =>
       do r <- ireg_of res; do r1 <- freg_of a1;
       OK (Pftosizd r r1 :: k)
@@ -415,6 +455,18 @@ Definition transl_op
   | Ofloatofintu, a1 :: nil =>
       do r <- freg_of res; do r1 <- ireg_of a1;
       OK (Pfuitod r r1 :: k)
+  | Ointofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1;
+      OK (Pftosizs r r1 :: k)
+  | Ointuofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1;
+      OK (Pftouizs r r1 :: k)
+  | Osingleofint, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1;
+      OK (Pfsitos r r1 :: k)
+  | Osingleofintu, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1;
+      OK (Pfuitos r r1 :: k)
   | Ocmp cmp, _ =>
       do r <- ireg_of res;
       transl_cond cmp args
@@ -440,31 +492,34 @@ Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) :=
   indexed_memory_access (fun base n => Pldr dst base (SAimm n)) mk_immed_mem_word base ofs k.
 
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of dst; OK (loadind_int base ofs r k)
-  | Tfloat =>
-      do r <- freg_of dst;
-      OK (indexed_memory_access (Pfldd r) mk_immed_mem_float base ofs k)
-  | Tsingle =>
-      do r <- freg_of dst;
+  match ty, preg_of dst with
+  | Tint, IR r =>
+      OK (indexed_memory_access (fun base n => Pldr r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tany32, IR r =>
+      OK (indexed_memory_access (fun base n => Pldr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tsingle, FR r =>
       OK (indexed_memory_access (Pflds r) mk_immed_mem_float base ofs k)
-  | Tlong =>
+  | Tfloat, FR r =>
+      OK (indexed_memory_access (Pfldd r) mk_immed_mem_float base ofs k)
+  | Tany64, FR r =>
+      OK (indexed_memory_access (Pfldd_a r) mk_immed_mem_float base ofs k)
+  | _, _ =>
       Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of src;
+  match ty, preg_of src with
+  | Tint, IR r =>
       OK (indexed_memory_access (fun base n => Pstr r base (SAimm n)) mk_immed_mem_word base ofs k)
-  | Tfloat =>
-      do r <- freg_of src;
-      OK (indexed_memory_access (Pfstd r) mk_immed_mem_float base ofs k)
-  | Tsingle =>
-      do r <- freg_of src;
+  | Tany32, IR r =>
+      OK (indexed_memory_access (fun base n => Pstr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+  | Tsingle, FR r =>
       OK (indexed_memory_access (Pfsts r) mk_immed_mem_float base ofs k)
-  | Tlong =>
+  | Tfloat, FR r =>
+      OK (indexed_memory_access (Pfstd r) mk_immed_mem_float base ofs k)
+  | Tany64, FR r =>
+      OK (indexed_memory_access (Pfstd_a r) mk_immed_mem_float base ofs k)
+  | _, _ =>
       Error (msg "Asmgen.storeind")
   end.
 
@@ -546,7 +601,7 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pflds mk_immed_mem_float dst addr args k
   | Mfloat64 =>
       transl_memory_access_float Pfldd mk_immed_mem_float dst addr args k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_load")
   end.
 
@@ -567,7 +622,7 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
       transl_memory_access_float Pfsts mk_immed_mem_float src addr args k
   | Mfloat64 =>
       transl_memory_access_float Pfstd mk_immed_mem_float src addr args k
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index cfe4f54..341f6a0 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -237,16 +237,15 @@ Hint Resolve indexed_memory_access_label.
 Remark loadind_label:
   forall base ofs ty dst k c, loadind base ofs ty dst k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct ty; monadInv H.
-  unfold loadind_int; TailNoLabel.
-  TailNoLabel.
-  TailNoLabel.
+  unfold loadind, loadind_int; intros;
+  destruct ty, (preg_of dst); inv H; TailNoLabel.
 Qed.
 
 Remark storeind_label:
   forall base ofs ty src k c, storeind src base ofs ty k = OK c -> tail_nolabel k c.
 Proof.
-  intros. destruct ty; monadInv H; TailNoLabel.
+  unfold storeind; intros;
+  destruct ty, (preg_of src); inv H; TailNoLabel.
 Qed.
 
 Remark transl_cond_label:
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 3e00217..a0d6752 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -405,22 +405,6 @@ Proof.
   split. Simpl. intros; Simpl.
 Qed.
 
-(*
-Lemma loadind_float_correct:
-  forall (base: ireg) ofs dst (rs: regset) m v k,
-  Mem.loadv Mfloat64al32 m (Val.add rs#base (Vint ofs)) = Some v ->
-  exists rs',
-     exec_straight ge fn (loadind_float base ofs dst k) rs m k rs' m
-  /\ rs'#dst = v
-  /\ forall r, r <> PC -> r <> IR14 -> r <> dst -> rs'#r = rs#r.
-Proof.
-  intros; unfold loadind_float. apply indexed_memory_access_correct; intros.
-  econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_load. rewrite H0; rewrite H; eauto. auto.
-  split. Simpl. intros; Simpl.
-Qed.
-*)
-
 Lemma loadind_correct:
   forall (base: ireg) ofs ty dst k c (rs: regset) m v,
   loadind base ofs ty dst k = OK c ->
@@ -430,56 +414,32 @@ Lemma loadind_correct:
   /\ rs'#(preg_of dst) = v
   /\ forall r, r <> PC -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r.
 Proof.
-  unfold loadind; intros.
-  destruct ty; monadInv H.
+  unfold loadind; intros. destruct ty; destruct (preg_of dst); inv H; simpl in H0.
 - (* int *)
-  erewrite ireg_of_eq by eauto. apply loadind_int_correct; auto. 
+  apply loadind_int_correct; auto. 
 - (* float *)
-  erewrite freg_of_eq by eauto. simpl in H0.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
   split. Simpl. intros; Simpl.
 - (* single *)
-  erewrite freg_of_eq by eauto. simpl in H0.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
   split. Simpl. intros; Simpl.
-Qed.
-
-(** Indexed memory stores. *)
-
-(*
-Lemma storeind_int_correct:
-  forall (base: ireg) ofs (src: ireg) (rs: regset) m m' k,
-  Mem.storev Mint32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
-  src <> IR14 ->
-  exists rs',
-     exec_straight ge fn (storeind_int src base ofs k) rs m k rs' m'
-  /\ forall r, r <> PC -> r <> IR14 -> rs'#r = rs#r.
-Proof.
-  intros; unfold storeind_int. apply indexed_memory_access_correct; intros.
+- (* any32 *)
+  apply indexed_memory_access_correct; intros.
   econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H1. rewrite H2; auto with asmgen. rewrite H; eauto. auto.
-  intros; Simpl.
-Qed.
-
-Lemma storeind_float_correct:
-  forall (base: ireg) ofs (src: freg) (rs: regset) m m' k,
-  Mem.storev Mfloat64al32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
-  exists rs',
-     exec_straight ge fn (storeind_float src base ofs k) rs m k rs' m'
-  /\ forall r, r <> PC -> r <> IR14 -> rs'#r = rs#r.
-Proof.
-  intros; unfold storeind_float. apply indexed_memory_access_correct; intros.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
+- (* any64 *)
+  apply indexed_memory_access_correct; intros.
   econstructor; split. 
-  apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H0. rewrite H1; auto with asmgen. rewrite H; eauto. auto.
-  intros; Simpl.
+  apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. 
+  split. Simpl. intros; Simpl.
 Qed.
-*)
+
+(** Indexed memory stores. *)
 
 Lemma storeind_correct:
   forall (base: ireg) ofs ty src k c (rs: regset) m m',
@@ -490,29 +450,38 @@ Lemma storeind_correct:
   /\ forall r, r <> PC -> r <> IR14 -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
   unfold storeind; intros.
-  destruct ty; monadInv H; simpl in H0.
+  assert (NOT14: preg_of src <> IR IR14) by eauto with asmgen.
+  destruct ty; destruct (preg_of src); inv H; simpl in H0.
 - (* int *)
-  erewrite ireg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  assert (IR x <> IR IR14) by eauto with asmgen. congruence.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 - (* float *)
-  erewrite freg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 - (* single *)
-  erewrite freg_of_eq  in H0 by eauto.
   apply indexed_memory_access_correct; intros.
   econstructor; split. 
   apply exec_straight_one. simpl. unfold exec_store. 
-  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto.
-  auto. intros; Simpl.
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
+- (* any32 *)
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
+- (* any64 *)
+  apply indexed_memory_access_correct; intros.
+  econstructor; split. 
+  apply exec_straight_one. simpl. unfold exec_store. 
+  rewrite H. rewrite H1; auto with asmgen. rewrite H0; eauto. auto.
+  intros; Simpl.
 Qed.
 
 (** Translation of shift immediates *)
@@ -775,6 +744,112 @@ Proof.
   exfalso; eapply Float.cmp_lt_gt_false; eauto.
 Qed.
 
+Lemma compare_float32_spec:
+  forall rs f1 f2,
+  let rs1 := nextinstr (compare_float32 rs (Vsingle f1) (Vsingle f2)) in
+     rs1#CN = Val.of_bool (Float32.cmp Clt f1 f2)
+  /\ rs1#CZ = Val.of_bool (Float32.cmp Ceq f1 f2)
+  /\ rs1#CC = Val.of_bool (negb (Float32.cmp Clt f1 f2))
+  /\ rs1#CV = Val.of_bool (negb (Float32.cmp Ceq f1 f2 || Float32.cmp Clt f1 f2 || Float32.cmp Cgt f1 f2)).
+Proof.
+  intros. intuition.
+Qed.
+
+Lemma compare_float32_inv:
+  forall rs v1 v2,
+  let rs1 := nextinstr (compare_float32 rs v1 v2) in
+  forall r', data_preg r' = true -> rs1#r' = rs#r'.
+Proof.
+  intros. unfold rs1, compare_float32.
+  assert (nextinstr (rs#CN <- Vundef #CZ <- Vundef #CC <- Vundef #CV <- Vundef) r' = rs r').
+  { repeat Simplif. }
+  destruct v1; destruct v2; auto.
+  repeat Simplif. 
+Qed.
+
+Lemma compare_float32_nextpc:
+  forall rs v1 v2,
+  nextinstr (compare_float32 rs v1 v2) PC = Val.add (rs PC) Vone.
+Proof.
+  intros. unfold compare_float32. destruct v1; destruct v2; reflexivity.
+Qed.
+
+Lemma cond_for_float32_cmp_correct:
+  forall c n1 n2 rs,
+  eval_testcond (cond_for_float_cmp c)
+                (nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))) =
+  Some(Float32.cmp c n1 n2).
+Proof.
+  intros.
+  generalize (compare_float32_spec rs n1 n2). 
+  set (rs' := nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))).
+  intros [A [B [C D]]].
+  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
+  destruct c; simpl.
+(* eq *)
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq n1 n2); auto.
+(* lt *)
+  destruct (Float32.cmp Clt n1 n2); auto.
+(* le *)
+  rewrite Float32.cmp_le_lt_eq. 
+  destruct (Float32.cmp Clt n1 n2); destruct (Float32.cmp Ceq n1 n2); auto.
+(* gt *)
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+  exfalso; eapply Float32.cmp_gt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+(* ge *)
+  rewrite Float32.cmp_ge_gt_eq. 
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+Qed.
+
+Lemma cond_for_float32_not_cmp_correct:
+  forall c n1 n2 rs,
+  eval_testcond (cond_for_float_not_cmp c)
+                (nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2)))=
+  Some(negb(Float32.cmp c n1 n2)).
+Proof.
+  intros.
+  generalize (compare_float32_spec rs n1 n2).
+  set (rs' := nextinstr (compare_float32 rs (Vsingle n1) (Vsingle n2))).
+  intros [A [B [C D]]].
+  unfold eval_testcond. rewrite A; rewrite B; rewrite C; rewrite D.
+  destruct c; simpl.
+(* eq *)
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq. destruct (Float32.cmp Ceq n1 n2); auto.
+(* lt *)
+  destruct (Float32.cmp Clt n1 n2); auto.
+(* le *)
+  rewrite Float32.cmp_le_lt_eq. 
+  destruct (Float32.cmp Clt n1 n2) eqn:LT; destruct (Float32.cmp Ceq n1 n2) eqn:EQ; auto.
+(* gt *)
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+  exfalso; eapply Float32.cmp_gt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+(* ge *)
+  rewrite Float32.cmp_ge_gt_eq. 
+  destruct (Float32.cmp Ceq n1 n2) eqn:EQ;
+  destruct (Float32.cmp Clt n1 n2) eqn:LT;
+  destruct (Float32.cmp Cgt n1 n2) eqn:GT; auto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_eq_false; eauto.
+  exfalso; eapply Float32.cmp_lt_gt_false; eauto.
+Qed.
+
 Ltac ArgsInv :=
   repeat (match goal with
   | [ H: Error _ = OK _ |- _ ] => discriminate
@@ -881,6 +956,37 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
 Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
   exact I.
   apply compare_float_inv.
+- (* Ccompfs *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
+  simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+  apply compare_float32_inv.
+- (* Cnotcompfs *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
+  simpl in CMP. inv CMP.
+Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct.
+  exact I.
+  apply compare_float32_inv.
+- (* Ccompfszero *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
+  destruct (rs x); try discriminate.
+  simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+  apply compare_float32_inv.
+- (* Cnotcompfzero *)
+  econstructor.
+  split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
+  split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
+  destruct (rs x); try discriminate. simpl in CMP. inv CMP.
+  simpl. apply cond_for_float32_not_cmp_correct.
+  exact I.
+  apply compare_float32_inv.
 Qed.
 
 (** Translation of arithmetic operations. *)
@@ -1039,6 +1145,18 @@ Transparent destroyed_by_op.
   (* floatofintu *)
   econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
   intuition Simpl.
+  (* intofsingle *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  simpl. intuition Simpl.
+  (* intuofsingle *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  simpl. intuition Simpl.
+  (* singleofint *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  intuition Simpl.
+  (* singleofintu *)
+  econstructor; split. apply exec_straight_one; simpl. rewrite H0; simpl. eauto. auto.
+  intuition Simpl.
   (* Ocmp *)
   contradiction.
 Qed.
@@ -1234,6 +1352,8 @@ Proof.
   discriminate.
   eapply transl_load_float_correct; eauto.
   eapply transl_load_float_correct; eauto.
+  discriminate.
+  discriminate.
 Qed.
 
 Lemma transl_store_correct:
@@ -1256,13 +1376,10 @@ Proof.
 - eapply transl_store_int_correct; eauto.
 - eapply transl_store_int_correct; eauto.
 - discriminate.
-- unfold transl_memory_access_float in H. monadInv H. rewrite (freg_of_eq _ _ EQ) in *. 
-  eapply transl_memory_access_correct; eauto. 
-  intros. econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H. rewrite H2; eauto with asmgen. 
-  rewrite H1. eauto. auto. intros. Simpl.
-  simpl; auto.
 - eapply transl_store_float_correct; eauto.
+- eapply transl_store_float_correct; eauto.
+- discriminate.
+- discriminate.
 Qed.
 
 End CONSTRUCTORS.
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index 2b658a4..4f4bf5a 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -70,6 +70,22 @@ Nondetfunction cond_strength_reduction
       if Float.eq_dec n2 Float.zero
       then (Cnotcompfzero c, r1 :: nil)
       else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Ccompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Ccompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Ccompfszero c, r1 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, FS n1 :: v2 :: nil =>
+      if Float32.eq_dec n1 Float32.zero
+      then (Cnotcompfszero (swap_comparison c), r2 :: nil)
+      else (cond, args)
+  | Cnotcompfs c, r1 :: r2 :: nil, v1 :: FS n2 :: nil =>
+      if Float32.eq_dec n2 Float32.zero
+      then (Cnotcompfszero c, r1 :: nil)
+      else (cond, args)
   | _, _, _ => 
       (cond, args)
   end.
@@ -164,18 +180,19 @@ Definition make_xorimm (n: int) (r: reg) :=
   else (Oxorimm n, r :: nil).
 
 Definition make_mulfimm (n: float) (r r1 r2: reg) :=
-  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
   then (Oaddf, r :: r :: nil)
   else (Omulf, r1 :: r2 :: nil).
 
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
 Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
-Definition make_singleoffloat (r: reg) (a: aval) :=
-  if vincl a Fsingle && generate_float_constants tt
-  then (Omove, r :: nil)
-  else (Osingleoffloat, r :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
@@ -207,10 +224,11 @@ Nondetfunction op_strength_reduction
   | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1 r2
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1 r2
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
-  | Osingleoffloat, r1 :: nil, v1 :: nil => make_singleoffloat r1 v1
   | Ocmp c, args, vl => make_cmp c args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index 00ea8bc..597c960 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -80,6 +80,10 @@ Ltac SimplVM :=
       let E := fresh in
       assert (E: v = Vfloat n) by (inversion H; auto);
       rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
   | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
       let E := fresh in
       assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); 
@@ -123,6 +127,18 @@ Proof.
 - destruct (Float.eq_dec n2 Float.zero); simpl; auto.
   subst n2; auto.
   rewrite H1; auto. 
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (rs#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto. 
+- destruct (Float32.eq_dec n2 Float32.zero).
+  subst n2. simpl. auto.
+  simpl. rewrite H1; auto.
+- destruct (Float32.eq_dec n1 Float32.zero).
+  subst n1. simpl. destruct (rs#r2); simpl; auto. rewrite Float32.cmp_swap. auto.
+  simpl. rewrite H1; auto. 
+- destruct (Float32.eq_dec n2 Float32.zero); simpl; auto.
+  subst n2; auto.
+  rewrite H1; auto. 
 - auto.
 Qed.
 
@@ -335,7 +351,7 @@ Lemma make_mulfimm_correct:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
   simpl. econstructor; split; eauto. 
@@ -348,13 +364,40 @@ Lemma make_mulfimm_correct_2:
   exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
   rewrite Float.mul_commut; auto. 
   simpl. econstructor; split; eauto. 
 Qed.
 
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float32.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.mulfs rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float32.mul2_add; auto. 
+  rewrite Float32.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma make_cast8signed_correct:
   forall r x,
   vmatch bc rs#r x ->
@@ -383,21 +426,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_singleoffloat_correct:
-  forall r x,
-  vmatch bc rs#r x ->
-  let (op, args) := make_singleoffloat r x in
-  exists v, eval_operation ge (Vptr sp Int.zero) op rs##args m = Some v /\ Val.lessdef (Val.singleoffloat rs#r) v.
-Proof.
-  intros; unfold make_singleoffloat. 
-  destruct (vincl x Fsingle && generate_float_constants tt) eqn:INCL. 
-  InvBooleans. exists rs#r; split; auto. 
-  assert (V: vmatch bc rs#r Fsingle).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite Float.singleoffloat_of_single by auto. auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -459,14 +487,16 @@ Proof.
   InvApproxRegs; SimplVM. inv H0. apply make_shrimm_correct; auto.
 (* shru *)
   InvApproxRegs; SimplVM. inv H0. apply make_shruimm_correct; auto.
-(* singleoffloat *)
-  InvApproxRegs; SimplVM; inv H0. apply make_singleoffloat_correct; auto.
 (* cmp *)
   inv H0. apply make_cmp_correct; auto.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) rs#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
+(* mulfs *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) rs#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/arm/Machregs.v b/arm/Machregs.v
index b55259b..791ccbb 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -44,14 +44,10 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | R0 => Tint  | R1 => Tint  | R2 => Tint  | R3 => Tint
-  | R4 => Tint  | R5 => Tint  | R6 => Tint  | R7 => Tint
-  | R8 => Tint  | R9 => Tint  | R10 => Tint | R11 => Tint
-  | R12 => Tint
-  | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat
-  | F4 => Tfloat| F5 => Tfloat  | F6 => Tfloat | F7 => Tfloat
-  | F8 => Tfloat | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat
-  | F12 => Tfloat | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
+  | R0  | R1  | R2  | R3   | R4  | R5  | R6  | R7 
+  | R8  | R9  | R10 | R11  | R12 => Tany32
+  | F0  | F1  | F2  | F3   | F4 | F5   | F6  | F7 
+  | F8  | F9  | F10  | F11 | F12  | F13  | F14  | F15 => Tany64
   end.
 
 Open Scope positive_scope.
@@ -84,18 +80,14 @@ Definition is_stack_reg (r: mreg) : bool := false.
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
   | Odiv | Odivu => R0 :: R1 :: R2 :: R3 :: R12 :: nil
-  | Ointoffloat | Ointuoffloat => F6 :: nil
+  | Ointoffloat | Ointuoffloat | Ointofsingle | Ointuofsingle => F6 :: nil
   | _ => nil
   end.
 
 Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
   nil.
 
-Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
-  match chunk with
-  | Mfloat32 => F6 :: nil
-  | _ => nil
-  end.
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := nil.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
   nil.
@@ -106,16 +98,10 @@ Definition destroyed_by_jumptable: list mreg :=
 Definition destroyed_by_builtin (ef: external_function): list mreg :=
   match ef with
   | EF_memcpy sz al => if zle sz 32 then nil else R2 :: R3 :: R12 :: nil
-  | EF_vstore Mfloat32 => F6 :: nil
-  | EF_vstore_global Mfloat32 _ _ => F6 :: nil
   | _ => nil
   end.
 
-Definition destroyed_by_setstack (ty: typ): list mreg :=
-  match ty with
-  | Tsingle => F6 :: nil
-  | _ => nil
-  end.
+Definition destroyed_by_setstack (ty: typ): list mreg := nil.
 
 Definition destroyed_at_function_entry: list mreg :=
   R12 :: nil.
diff --git a/arm/NeedOp.v b/arm/NeedOp.v
index 3fb0d72..e91ea64 100644
--- a/arm/NeedOp.v
+++ b/arm/NeedOp.v
@@ -42,6 +42,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Omove => nv::nil
   | Ointconst n => nil
   | Ofloatconst n => nil
+  | Osingleconst n => nil
   | Oaddrsymbol id ofs => nil
   | Oaddrstack ofs => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
@@ -74,8 +75,11 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Oshrximm _ => op1 (default nv)
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
-  | Osingleoffloat => op1 (singleoffloat nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Ofloatofsingle | Osingleoffloat => op1 (default nv)
   | Ointoffloat | Ointuoffloat | Ofloatofint | Ofloatofintu => op1 (default nv)
+  | Ointofsingle | Ointuofsingle | Osingleofint | Osingleofintu => op1 (default nv)
   | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocmp c => needs_of_condition c
@@ -87,7 +91,6 @@ Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   | Ocast16signed => sign_ext_redundant 16 nv
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
-  | Osingleoffloat => singleoffloat_redundant nv
   | _ => false
   end.
 
@@ -180,7 +183,6 @@ Proof.
 - apply notint_sound; auto. 
 - apply notint_sound. apply needs_of_shift_sound; auto.
 - apply needs_of_shift_sound; auto. 
-- apply singleoffloat_sound; auto. 
 Qed.
 
 Lemma operation_is_redundant_sound:
@@ -195,7 +197,6 @@ Proof.
 - apply sign_ext_redundant_sound; auto. omega.
 - apply andimm_redundant_sound; auto. 
 - apply orimm_redundant_sound; auto.
-- apply singleoffloat_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
diff --git a/arm/Op.v b/arm/Op.v
index b50a7b0..e7971f0 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -57,11 +57,14 @@ Inductive condition : Type :=
   | Ccompushift: comparison -> shift -> condition (**r unsigned integer comparison *)
   | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *)
   | Ccompuimm: comparison -> int -> condition (**r unsigned integer comparison with a constant *)
-  | Ccompf: comparison -> condition     (**r floating-point comparison *)
+  | Ccompf: comparison -> condition     (**r 64-bit floating-point comparison *)
   | Cnotcompf: comparison -> condition (**r negation of a floating-point comparison *)
   | Ccompfzero: comparison -> condition     (**r floating-point comparison with 0.0 *)
-  | Cnotcompfzero: comparison -> condition. (**r negation of a floating-point comparison with 0.0 *)
-
+  | Cnotcompfzero: comparison -> condition  (**r negation of a floating-point comparison with 0.0 *)
+  | Ccompfs: comparison -> condition    (**r 32-bit floating-point comparison *)
+  | Cnotcompfs: comparison -> condition (**r negation of a floating-point comparison *)
+  | Ccompfszero: comparison -> condition     (**r floating-point comparison with 0.0 *)
+  | Cnotcompfszero: comparison -> condition.  (**r negation of a floating-point comparison with 0.0 *)
 
 (** Arithmetic and logical operations.  In the descriptions, [rd] is the
   result of the operation and [r1], [r2], etc, are the arguments. *)
@@ -69,7 +72,8 @@ Inductive condition : Type :=
 Inductive operation : Type :=
   | Omove: operation                    (**r [rd = r1] *)
   | Ointconst: int -> operation         (**r [rd] is set to the given integer constant *)
-  | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
+  | Ofloatconst: float -> operation     (**r [rd] is set to the given 64-bit float constant *)
+  | Osingleconst: float32 -> operation  (**r [rd] is set to the given 32-bit float constant *)
   | Oaddrsymbol: ident -> int -> operation (**r [rd] is set to the the address of the symbol plus the offset *)
   | Oaddrstack: int -> operation        (**r [rd] is set to the stack pointer plus the given offset *)
 (*c Integer arithmetic: *)
@@ -113,12 +117,23 @@ Inductive operation : Type :=
   | Osubf: operation                    (**r [rd = r1 - r2] *)
   | Omulf: operation                    (**r [rd = r1 * r2] *)
   | Odivf: operation                    (**r [rd = r1 / r2] *)
+  | Onegfs: operation                   (**r [rd = - r1] *)
+  | Oabsfs: operation                   (**r [rd = abs(r1)] *)
+  | Oaddfs: operation                   (**r [rd = r1 + r2] *)
+  | Osubfs: operation                   (**r [rd = r1 - r2] *)
+  | Omulfs: operation                   (**r [rd = r1 * r2] *)
+  | Odivfs: operation                   (**r [rd = r1 / r2] *)
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle: operation           (**r [rd] is [r1] expanded to double-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ointuoffloat: operation             (**r [rd = unsigned_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
   | Ofloatofintu: operation             (**r [rd = float_of_unsigned_int(r1)] *)
+  | Ointofsingle: operation              (**r [rd = signed_int_of_single(r1)] *)
+  | Ointuofsingle: operation             (**r [rd = unsigned_int_of_single(r1)] *)
+  | Osingleofint: operation              (**r [rd = single_of_signed_int(r1)] *)
+  | Osingleofintu: operation             (**r [rd = single_of_unsigned_int(r1)] *)
 (*c Manipulating 64-bit integers: *)
   | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
   | Olowlong: operation                 (**r [rd = low-word(r1)] *)
@@ -160,6 +175,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Float32.eq_dec; intro.
   assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
   generalize eq_shift; intro.
   generalize eq_condition; intro.
@@ -203,6 +219,10 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem):
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
   | Ccompfzero c, v1 :: nil => Val.cmpf_bool c v1 (Vfloat Float.zero)
   | Cnotcompfzero c, v1 :: nil => option_map negb (Val.cmpf_bool c v1 (Vfloat Float.zero))
+  | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => Val.cmpfs_bool c v1 (Vsingle Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => option_map negb (Val.cmpfs_bool c v1 (Vsingle Float32.zero))
   | _, _ => None
   end.
 
@@ -213,6 +233,7 @@ Definition eval_operation
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
   | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
   | Oaddrsymbol s ofs, nil => Some (Genv.symbol_address genv s ofs)
   | Oaddrstack ofs, nil => Some (Val.add sp (Vint ofs))
   | Ocast8signed, v1::nil => Some (Val.sign_ext 8 v1)
@@ -254,11 +275,22 @@ Definition eval_operation
   | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
   | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
   | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
+  | Onegfs, v1::nil => Some(Val.negfs v1)
+  | Oabsfs, v1::nil => Some(Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ointuoffloat, v1::nil => Val.intuoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
   | Ofloatofintu, v1::nil => Val.floatofintu v1
+  | Ointofsingle, v1::nil => Val.intofsingle v1
+  | Ointuofsingle, v1::nil => Val.intuofsingle v1
+  | Osingleofint, v1::nil => Val.singleofint v1
+  | Osingleofintu, v1::nil => Val.singleofintu v1
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some(Val.loword v1)
   | Ohighlong, v1::nil => Some(Val.hiword v1)
@@ -281,8 +313,6 @@ Ltac FuncInv :=
   match goal with
   | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
       destruct x; simpl in H; try discriminate; FuncInv
-  | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
-      destruct v; simpl in H; try discriminate; FuncInv
   | H: (Some _ = Some _) |- _ =>
       injection H; intros; clear H; FuncInv
   | _ =>
@@ -303,13 +333,18 @@ Definition type_of_condition (c: condition) : list typ :=
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
   | Ccompfzero _ => Tfloat :: nil
   | Cnotcompfzero _ => Tfloat :: nil
+  | Ccompfs _ => Tsingle :: Tsingle :: nil
+  | Cnotcompfs _ => Tsingle :: Tsingle :: nil
+  | Ccompfszero _ => Tsingle :: nil
+  | Cnotcompfszero _ => Tsingle :: nil
   end.
 
 Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
   | Oaddrsymbol _ _ => (nil, Tint)
   | Oaddrstack _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
@@ -351,11 +386,22 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
   | Ofloatofintu => (Tint :: nil, Tfloat)
+  | Ointofsingle => (Tsingle :: nil, Tint)
+  | Ointuofsingle => (Tsingle :: nil, Tint)
+  | Osingleofint => (Tint :: nil, Tsingle)
+  | Osingleofintu => (Tint :: nil, Tsingle)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
   | Ohighlong => (Tlong :: nil, Tint)
@@ -390,7 +436,6 @@ Proof with (try exact I).
   intros.
   destruct op; simpl; simpl in H0; FuncInv; try subst v...
   congruence.
-  destruct (Float.is_single_dec f); red; auto. 
   unfold Genv.symbol_address. destruct (Genv.find_symbol genv i)...
   destruct sp...
   destruct v0...
@@ -433,9 +478,20 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0... simpl. apply Float.singleoffloat_is_single.  
-  destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); simpl in H2; inv H2...
-  destruct v0; simpl in H0; inv H0. destruct (Float.intuoffloat f); simpl in H2; inv H2...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); simpl in H2; inv H2...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); simpl in H2; inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
@@ -503,6 +559,10 @@ Definition negate_condition (cond: condition): condition :=
   | Cnotcompf c => Ccompf c
   | Ccompfzero c => Cnotcompfzero c
   | Cnotcompfzero c => Ccompfzero c
+  | Ccompfs c => Cnotcompfs c
+  | Cnotcompfs c => Ccompfs c
+  | Ccompfszero c => Cnotcompfszero c
+  | Cnotcompfszero c => Ccompfszero c
   end.
 
 Lemma eval_negate_condition:
@@ -520,6 +580,10 @@ Proof.
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0); auto. destruct b; auto.
   repeat (destruct vl; auto). 
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v (Vfloat Float.zero)); auto. destruct b; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0); auto. destruct b; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v (Vsingle Float32.zero)); auto. destruct b; auto.
 Qed.
 
 (** Shifting stack-relative references.  This is used in [Stacking]. *)
@@ -772,6 +836,8 @@ Ltac InvInject :=
       inv H; InvInject
   | [ H: val_inject _ (Vfloat _) _ |- _ ] =>
       inv H; InvInject
+  | [ H: val_inject _ (Vsingle _) _ |- _ ] =>
+      inv H; InvInject
   | [ H: val_inject _ (Vptr _ _) _ |- _ ] =>
       inv H; InvInject
   | [ H: val_list_inject _ nil _ |- _ ] =>
@@ -804,6 +870,10 @@ Proof.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; simpl in H0; inv H0; auto.
   inv H3; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; simpl in H0; inv H0; auto.
+  inv H3; simpl in H0; inv H0; auto.
 Qed.
 
 Ltac TrivialExists :=
@@ -869,11 +939,27 @@ Proof.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
+
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+
   inv H4; simpl; auto.
+  inv H4; simpl; auto.
+
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in *; inv H1. TrivialExists.
+  inv H4; simpl in *; inv H1. TrivialExists.
 
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intuoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in *; inv H1. TrivialExists.
   inv H4; simpl in *; inv H1. TrivialExists.
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 7e6827e..2ad9114 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -161,9 +161,10 @@ let distance_to_emit_constants () =
 (* Associate labels to floating-point constants and to symbols *)
 
 let float_labels = (Hashtbl.create 39 : (int64, int) Hashtbl.t)
+let float32_labels = (Hashtbl.create 39 : (int32, int) Hashtbl.t)
 
 let label_float f =
-  let bf = camlint64_of_coqint(Floats.Float.bits_of_double f) in
+  let bf = camlint64_of_coqint(Floats.Float.to_bits f) in
   try
     Hashtbl.find float_labels bf
   with Not_found ->
@@ -173,6 +174,17 @@ let label_float f =
     max_pos_constants := min !max_pos_constants (!currpos + 1024);
     lbl'
 
+let label_float32 f =
+  let bf = camlint_of_coqint(Floats.Float32.to_bits f) in
+  try
+    Hashtbl.find float32_labels bf
+  with Not_found ->
+    let lbl' = new_label() in
+    Hashtbl.add float32_labels bf lbl';
+    size_constants := !size_constants + 4;
+    max_pos_constants := min !max_pos_constants (!currpos + 1024);
+    lbl'
+
 let symbol_labels =
   (Hashtbl.create 39 : (ident * Integers.Int.int, int) Hashtbl.t)
 
@@ -188,6 +200,7 @@ let label_symbol id ofs =
 
 let reset_constants () =
   Hashtbl.clear float_labels;
+  Hashtbl.clear float32_labels;
   Hashtbl.clear symbol_labels;
   size_constants := 0;
   max_pos_constants := max_int
@@ -201,6 +214,10 @@ let emit_constants oc =
       fprintf oc ".L%d:	.word	0x%Lx, 0x%Lx\n" lbl bflo bfhi)
     float_labels;
   Hashtbl.iter
+    (fun bf lbl ->
+      fprintf oc ".L%d:	.word	0x%lx\n" lbl bf)
+    float32_labels;
+  Hashtbl.iter
     (fun (id, ofs) lbl ->
       fprintf oc ".L%d:	.word	%a\n"
           lbl print_symb_ofs (id, ofs))
@@ -348,8 +365,7 @@ let print_builtin_vload_common oc chunk args res =
         fprintf oc "	ldr	%a, [%a, #0]\n" ireg res2 ireg addr
       end; 2
   | Mfloat32, [IR addr], [FR res] ->
-      fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr;
-      fprintf oc "	fcvtds	%a, %a\n" freg res freg_single res; 2
+      fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr; 1
   | Mfloat64, [IR addr], [FR res] ->
       fprintf oc "	fldd	%a, [%a, #0]\n" freg res ireg addr; 1
   | _ ->
@@ -379,8 +395,7 @@ let print_builtin_vstore_common oc chunk args =
       fprintf oc "	str	%a, [%a, #0]\n" ireg src2 ireg addr;
       fprintf oc "	str	%a, [%a, #4]\n" ireg src1 ireg addr; 2
   | Mfloat32, [IR  addr; FR src] ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg src;
-      fprintf oc "	fsts	%a, [%a, #0]\n" freg_single FR6 ireg addr; 2
+      fprintf oc "	fsts	%a, [%a, #0]\n" freg_single src ireg addr; 1
   | Mfloat64, [IR  addr; FR src] ->
       fprintf oc "	fstd	%a, [%a, #0]\n" freg src ireg addr; 1
   | _ ->
@@ -409,11 +424,11 @@ let align n a = (n + a - 1) land (-a)
 let rec next_arg_location ir ofs = function
   | [] ->
       Int32.of_int (ir * 4 + ofs)
-  | (Tint | Tsingle) :: l ->
+  | (Tint | Tsingle | Tany32) :: l ->
       if ir < 4
       then next_arg_location (ir + 1) ofs l
       else next_arg_location ir (ofs + 4) l
-  | (Tfloat | Tlong) :: l ->
+  | (Tfloat | Tlong | Tany64) :: l ->
       if ir < 3
       then next_arg_location (align ir 2 + 2) ofs l
       else next_arg_location ir (align ofs 8 + 8) l
@@ -525,11 +540,9 @@ module FixupEABI = struct
 
   let fixup_single oc dir f i =
     match dir with
-    | Incoming ->     (* f <- i; f <- double_of_single f *)
-        fprintf oc "	fmsr	%a, %a\n" freg_single f ireg i;
-        fprintf oc "	fcvtds	%a, %a\n" freg f freg_single f
-    | Outgoing ->     (* f <- single_of_double f; i <- f *)
-        fprintf oc "	fcvtsd	%a, %a\n" freg_single f freg f;
+    | Incoming ->     (* f <- i *)
+        fprintf oc "	fmsr	%a, %a\n" freg_single f ireg i
+    | Outgoing ->     (* i <- f *)
         fprintf oc "	fmrs	%a, %a\n" ireg i freg_single f
 
   let fixup_conventions oc dir tyl =
@@ -537,11 +550,11 @@ module FixupEABI = struct
       if i >= 4 then 0 else
         match tyl with
         | [] -> 0
-        | Tint :: tyl' ->
+        | (Tint | Tany32) :: tyl' ->
             fixup (i+1) tyl'
         | Tlong :: tyl' ->
             fixup (((i + 1) land (-2)) + 2) tyl'
-        | Tfloat :: tyl' ->
+        | (Tfloat | Tany64) :: tyl' ->
             let i = (i + 1) land (-2) in
             if i >= 4 then 0 else begin
               fixup_double oc dir (freg_param i) (ireg_param i) (ireg_param (i+1));
@@ -549,7 +562,7 @@ module FixupEABI = struct
             end
         | Tsingle :: tyl' ->
             fixup_single oc dir (freg_param i) (ireg_param i);
-            2 + fixup (i+1) tyl'
+            1 + fixup (i+1) tyl'
     in fixup 0 tyl
 
   let fixup_arguments oc dir sg =
@@ -577,8 +590,8 @@ module FixupHF = struct
   let rec fixup_actions used fr tyl =
     match tyl with
     | [] -> []
-    | (Tint | Tlong) :: tyl' -> fixup_actions used fr tyl'
-    | Tfloat :: tyl' ->
+    | (Tint | Tlong | Tany32) :: tyl' -> fixup_actions used fr tyl'
+    | (Tfloat | Tany64) :: tyl' ->
         if fr >= 8 then [] else begin
           let dr = find_double used 0 in
           assert (dr < 8);
@@ -599,7 +612,7 @@ module FixupHF = struct
         1 + fixup_outgoing oc act
       end
   | (fr, Single, sr) :: act ->
-        fprintf oc "	fcvtsd	s%d, d%d\n" sr fr;
+        fprintf oc "	fcpys	s%d, s%d\n" sr (2*fr);
         1 + fixup_outgoing oc act
 
   let rec fixup_incoming oc = function
@@ -612,8 +625,10 @@ module FixupHF = struct
       end
   | (fr, Single, sr) :: act ->
       let n = fixup_incoming oc act in
-      fprintf oc "	fcvtds	d%d, s%d\n" fr sr;
+      if fr = sr then n else begin
+        fprintf oc "	fcpys	s%d, s%d\n" (2*fr) sr;
         1 + n
+      end
 
   let fixup_arguments oc dir sg =
     if sg.sig_cc.cc_vararg then
@@ -628,14 +643,8 @@ module FixupHF = struct
   let fixup_result oc dir sg =
     if sg.sig_cc.cc_vararg then
       FixupEABI.fixup_result oc dir sg
-    else begin
-      match proj_sig_res sg, dir with
-      | Tsingle, Outgoing ->
-          fprintf oc "	fcvtsd	s0, d0\n"; 1
-      | Tsingle, Incoming ->
-          fprintf oc "	fcvtds	d0, s0\n"; 1
-      | _ -> 0
-    end
+    else
+      0
 end
 
 let (fixup_arguments, fixup_result) =
@@ -703,7 +712,7 @@ let print_instruction oc = function
       fprintf oc "	cmp	%a, %a\n" ireg r1 shift_op so; 1
   | Peor(r1, r2, so) ->
       fprintf oc "	eor	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
-  | Pldr(r1, r2, sa) ->
+  | Pldr(r1, r2, sa) | Pldr_a(r1, r2, sa) ->
       fprintf oc "	ldr	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Pldrb(r1, r2, sa) ->
       fprintf oc "	ldrb	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
@@ -727,7 +736,7 @@ let print_instruction oc = function
       fprintf oc "	orr	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
   | Prsb(r1, r2, so) ->
       fprintf oc "	rsb	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
-  | Pstr(r1, r2, sa) ->
+  | Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) ->
       fprintf oc "	str	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa;
       begin match r1, r2, sa with
       | IR14, IR13, SAimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
@@ -786,19 +795,53 @@ let print_instruction oc = function
   | Pftouizd(r1, r2) ->
       fprintf oc "	ftouizd	%a, %a\n" freg_single FR6 freg r2;
       fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pfabss(r1, r2) ->
+      fprintf oc "	fabss	%a, %a\n" freg_single r1 freg_single r2; 1
+  | Pfnegs(r1, r2) ->
+      fprintf oc "	fnegs	%a, %a\n" freg_single r1 freg_single r2; 1
+  | Pfadds(r1, r2, r3) ->
+      fprintf oc "	fadds	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfdivs(r1, r2, r3) ->
+      fprintf oc "	fdivs	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfmuls(r1, r2, r3) ->
+      fprintf oc "	fmuls	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pfsubs(r1, r2, r3) ->
+      fprintf oc "	fsubs	%a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+  | Pflis(r1, f) ->
+      (* We could make good use of the fconsts instruction, but it's available
+         in VFD v3 and up, not in v1 nor v2 *)
+      let lbl = label_float32 f in
+      fprintf oc "	flds	%a, .L%d @ %.12g\n" freg_single r1 lbl (camlfloat_of_coqfloat32 f); 1
+  | Pfcmps(r1, r2) ->
+      fprintf oc "	fcmps	%a, %a\n" freg_single r1 freg_single r2;
+      fprintf oc "	fmstat\n"; 2
+  | Pfcmpzs(r1) ->
+      fprintf oc "	fcmpzs	%a\n" freg_single r1;
+      fprintf oc "	fmstat\n"; 2
+  | Pfsitos(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fsitos	%a, %a\n" freg_single r1 freg_single r1; 2
+  | Pfuitos(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fuitos	%a, %a\n" freg_single r1 freg_single r1; 2
+  | Pftosizs(r1, r2) ->
+      fprintf oc "	ftosizs	%a, %a\n" freg_single FR6 freg_single r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pftouizs(r1, r2) ->
+      fprintf oc "	ftouizs	%a, %a\n" freg_single FR6 freg_single r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
   | Pfcvtsd(r1, r2) ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single r1 freg r2;
-      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
-  | Pfldd(r1, r2, n) ->
+      fprintf oc "	fcvtsd	%a, %a\n" freg_single r1 freg r2; 1
+  | Pfcvtds(r1, r2) ->
+      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r2; 1
+  | Pfldd(r1, r2, n) | Pfldd_a(r1, r2, n) ->
       fprintf oc "	fldd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
   | Pflds(r1, r2, n) ->
-      fprintf oc "	flds	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n;
-      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
-  | Pfstd(r1, r2, n) ->
+      fprintf oc "	flds	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
+  | Pfstd(r1, r2, n) | Pfstd_a(r1, r2, n) ->
       fprintf oc "	fstd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
   | Pfsts(r1, r2, n) ->
-      fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg r1;
-      fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single FR6 ireg r2 coqint n; 2
+      fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
   (* Pseudo-instructions *)
   | Pallocframe(sz, ofs) ->
       fprintf oc "	mov	r12, sp\n";
@@ -927,10 +970,10 @@ let print_init oc = function
   | Init_int64 n ->
       fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
-      fprintf oc "	.word	0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float.bits_of_single n))
+      fprintf oc "	.word	0x%lx %s %.15g \n" (camlint_of_coqint (Floats.Float32.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_float64 n ->
-      fprintf oc "	.quad	%Ld %s %.18g\n" (camlint64_of_coqint (Floats.Float.bits_of_double n))
+      fprintf oc "	.quad	%Ld %s %.18g\n" (camlint64_of_coqint (Floats.Float.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_space n ->
       if Z.gt n Z.zero then
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index e4005c9..ad8a945 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -321,6 +321,12 @@ Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
 (** ** Comparisons *)
 
 Nondetfunction compimm (default: comparison -> int -> condition)
@@ -370,6 +376,9 @@ Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
 Definition compf (c: comparison) (e1: expr) (e2: expr) :=
   Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
 
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil).
+
 (** ** Integer conversions *)
 
 Definition cast8unsigned (e: expr) := andimm (Int.repr 255) e.
@@ -391,21 +400,38 @@ Nondetfunction cast16signed (e: expr) :=
 (** ** Floating-point conversions *)
 
 Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
 Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
 
 Nondetfunction floatofint (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofint n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofintu n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ => Eop Ofloatofintu (e ::: Enil)
   end.
 
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+Definition intuofsingle (e: expr) := Eop Ointuofsingle (e ::: Enil).
+
+Nondetfunction singleofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
+  | _ => Eop Osingleofint (e ::: Enil)
+  end.
+
+Nondetfunction singleofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
+  | _ => Eop Osingleofintu (e ::: Enil)
+  end.
+
 (** ** Recognition of addressing modes for load and store operations *)
 
 (** We do not recognize the [Aindexed2] and [Aindexed2shift] modes
@@ -424,6 +450,8 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
   | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
+  | Many32 => true
+  | Many64 => false
   end.
 
 Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
@@ -436,6 +464,8 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
   | Mint64 => false
   | Mfloat32 => false
   | Mfloat64 => false
+  | Many32 => true
+  | Many64 => false
   end.
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index 1dd2c20..c68d227 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -598,6 +598,31 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+ 
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
@@ -693,6 +718,12 @@ Proof.
   intros; red; intros. unfold compf. TrivialExists.
 Qed.
 
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
   red; intros until x. unfold cast8signed; case (cast8signed_match a); intros.
@@ -724,6 +755,11 @@ Proof.
   red; intros. unfold singleoffloat. TrivialExists.
 Qed.
 
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
 Theorem eval_intoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -764,6 +800,46 @@ Proof.
   TrivialExists.
 Qed.
 
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofint. case (singleofint_match a); intros.
+  InvEval. simpl in H0. TrivialExists. 
+  TrivialExists. 
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intuofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofintu. case (singleofintu_match a); intros.
+  InvEval. simpl in H0. TrivialExists. 
+  TrivialExists.
+Qed.
+
 Theorem eval_addressing:
   forall le chunk a v b ofs,
   eval_expr ge sp e m le a v ->
diff --git a/arm/ValueAOp.v b/arm/ValueAOp.v
index b312361..a14d6b9 100644
--- a/arm/ValueAOp.v
+++ b/arm/ValueAOp.v
@@ -44,6 +44,10 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
   | Ccompfzero c, v1 :: nil => cmpf_bool c v1 (F Float.zero)
   | Cnotcompfzero c, v1 :: nil => cnot (cmpf_bool c v1 (F Float.zero))
+  | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
+  | Ccompfszero c, v1 :: nil => cmpfs_bool c v1 (FS Float32.zero)
+  | Cnotcompfszero c, v1 :: nil => cnot (cmpfs_bool c v1 (FS Float32.zero))
   | _, _ => Bnone
   end.
 
@@ -61,6 +65,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omove, v1::nil => v1
   | Ointconst n, nil => I n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ftop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ftop
   | Oaddrsymbol id ofs, nil => Ptr (Gl id ofs)
   | Oaddrstack ofs, nil => Ptr (Stk ofs)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
@@ -102,11 +107,22 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubf, v1::v2::nil => subf v1 v2
   | Omulf, v1::v2::nil => mulf v1 v2
   | Odivf, v1::v2::nil => divf v1 v2
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
   | Ointuoffloat, v1::nil => intuoffloat v1
   | Ofloatofint, v1::nil => floatofint v1
   | Ofloatofintu, v1::nil => floatofintu v1
+  | Ointofsingle, v1::nil => intofsingle v1
+  | Ointuofsingle, v1::nil => intuofsingle v1
+  | Osingleofint, v1::nil => singleofint v1
+  | Osingleofintu, v1::nil => singleofintu v1
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
   | Ohighlong, v1::nil => hiword v1
@@ -185,6 +201,7 @@ Proof.
   unfold eval_operation, eval_static_operation; intros;
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
   rewrite Int.add_zero_l; eauto with va.
   fold (Val.sub (Vint i) a1). auto with va.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto. 
diff --git a/arm/eabi/Conventions1.v b/arm/eabi/Conventions1.v
index c02af1a..c26d29e 100644
--- a/arm/eabi/Conventions1.v
+++ b/arm/eabi/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -178,13 +179,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -235,12 +236,21 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R0 :: nil
-  | Some Tint => R0 :: nil
-  | Some (Tfloat | Tsingle) => F0 :: nil
+  | Some (Tint | Tany32) => R0 :: nil
+  | Some (Tfloat | Tsingle | Tany64) => F0 :: nil
   | Some Tlong => R1 :: R0 :: nil
   end.
 
-(** The result location is a caller-save register or a temporary *)
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
+Qed.
+
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
   forall (s: signature) (r: mreg), 
@@ -292,12 +302,12 @@ Definition sreg_param (n: Z) : mreg :=
 Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
-      (if zle 0 ofs then S Outgoing ofs Tint else R (ireg_param ofs))
+  | (Tint | Tany32) as ty :: tys =>
+      (if zle 0 ofs then S Outgoing ofs ty else R (ireg_param ofs))
       :: loc_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat | Tany64) as ty :: tys =>
       let ofs := align ofs 2 in
-      (if zle 0 ofs then S Outgoing ofs Tfloat else R (freg_param ofs))
+      (if zle 0 ofs then S Outgoing ofs ty else R (freg_param ofs))
       :: loc_arguments_rec tys (ofs + 2)
   | Tsingle :: tys =>
       (if zle 0 ofs then S Outgoing ofs Tsingle else R (sreg_param ofs))
@@ -321,8 +331,8 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | (Tint | Tsingle) :: tys => size_arguments_rec tys (ofs + 1)
-  | (Tfloat | Tlong) :: tys => size_arguments_rec tys (align ofs 2 + 2)
+  | (Tint | Tsingle | Tany32) :: tys => size_arguments_rec tys (ofs + 1)
+  | (Tfloat | Tlong | Tany64) :: tys => size_arguments_rec tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
@@ -404,6 +414,19 @@ Proof.
   split. omega. split. omega. congruence.
   apply sreg_param_caller_save.
   exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tany32 *)
+  destruct H.
+  subst l. destruct (zle 0 ofs).
+  split. omega. split. omega. congruence.
+  apply ireg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+- (* Tany64 *)
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  destruct H.
+  subst l. destruct (zle 0 (align ofs 2)).
+  split. omega. split. auto. congruence.
+  apply freg_param_caller_save.
+  exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -433,6 +456,9 @@ Proof.
   assert (ofs <= align ofs 2) by (apply align_le; omega).
   apply Zle_trans with (align ofs 2 + 2); auto; omega.
   apply Zle_trans with (ofs + 1); auto; omega.
+  apply Zle_trans with (ofs + 1); auto; omega.
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  apply Zle_trans with (align ofs 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -474,6 +500,10 @@ Proof.
     eapply Zle_trans. 2: apply H0. simpl typesize; omega. omega.
   - (* Tsingle *)
     destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
+  - (* Tany32 *)
+    destruct H1; auto. destruct (zle 0 ofs0); inv H1. apply H0. omega. 
+  - (* Tany64 *)
+    destruct H1; auto. destruct (zle 0 (align ofs0 2)); inv H1. apply H0. omega.
   }
   unfold size_arguments. apply H1. auto.  
 Qed.
diff --git a/arm/hardfloat/Conventions1.v b/arm/hardfloat/Conventions1.v
index e3875e7..40a761c 100644
--- a/arm/hardfloat/Conventions1.v
+++ b/arm/hardfloat/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -178,13 +179,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -235,12 +236,21 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => R0 :: nil
-  | Some Tint => R0 :: nil
-  | Some (Tfloat | Tsingle) => F0 :: nil
+  | Some (Tint | Tany32) => R0 :: nil
+  | Some (Tfloat | Tsingle | Tany64) => F0 :: nil
   | Some Tlong => R1 :: R0 :: nil
   end.
 
-(** The result location is a caller-save register or a temporary *)
+(** The result registers have types compatible with that given in the signature. *)
+
+Lemma loc_result_type:
+  forall sig,
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
+Proof.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
+Qed.
+
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
   forall (s: signature) (r: mreg), 
@@ -286,15 +296,15 @@ Fixpoint loc_arguments_rec
      (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
+  | (Tint | Tany32) as ty :: tys =>
       if zlt ir 4
       then R (ireg_param ir) :: loc_arguments_rec tys (ir + 1) fr ofs
-      else S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1)
-  | Tfloat :: tys =>
+      else S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 1)
+  | (Tfloat | Tany64) as ty :: tys =>
       if zlt fr 8
       then R (freg_param fr) :: loc_arguments_rec tys ir (fr + 1) ofs
       else let ofs := align ofs 2 in
-           S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2)
+           S Outgoing ofs ty :: loc_arguments_rec tys ir fr (ofs + 2)
   | Tsingle :: tys =>
       if zlt fr 8
       then R (freg_param fr) :: loc_arguments_rec tys ir (fr + 1) ofs
@@ -325,12 +335,12 @@ Fixpoint loc_arguments_vararg
      (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys =>
-      (if zlt ofs 0 then R (ireg_param (ofs + 4)) else S Outgoing ofs Tint)
+  | (Tint|Tany32) as ty :: tys =>
+      (if zlt ofs 0 then R (ireg_param (ofs + 4)) else S Outgoing ofs ty)
       :: loc_arguments_vararg tys (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat|Tany64) as ty :: tys =>
       let ofs := align ofs 2 in
-      (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs Tfloat)
+      (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs ty)
       :: loc_arguments_vararg tys (ofs + 2)
   | Tsingle :: tys =>
       (if zlt ofs 0 then R (freg_param (ofs + 4)) else S Outgoing ofs Tsingle)
@@ -356,11 +366,11 @@ Definition loc_arguments (s: signature) : list loc :=
 Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys =>
+  | (Tint|Tany32) :: tys =>
       if zlt ir 4
       then size_arguments_rec tys (ir + 1) fr ofs
       else size_arguments_rec tys ir fr (ofs + 1)
-  | Tfloat :: tys =>
+  | (Tfloat|Tany64) :: tys =>
       if zlt fr 8
       then size_arguments_rec tys ir (fr + 1) ofs
       else size_arguments_rec tys ir fr (align ofs 2 + 2)
@@ -378,8 +388,8 @@ Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
 Fixpoint size_arguments_vararg (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => Zmax 0 ofs
-  | (Tint | Tsingle) :: tys => size_arguments_vararg tys (ofs + 1)
-  | (Tfloat | Tlong) :: tys => size_arguments_vararg tys (align ofs 2 + 2)
+  | (Tint | Tsingle | Tany32) :: tys => size_arguments_vararg tys (ofs + 1)
+  | (Tfloat | Tlong | Tany64) :: tys => size_arguments_vararg tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
@@ -469,6 +479,19 @@ Proof.
   eapply IHtyl; eauto.
   subst. split; [omega | congruence].
   eapply INCR. eapply IHtyl; eauto. omega.
+- (* any32 *)
+  destruct (zlt ir 4); destruct H.
+  subst. left; apply ireg_param_in_params.
+  eapply IHtyl; eauto.
+  subst. split; [omega | congruence].
+  eapply INCR. eapply IHtyl; eauto. omega.
+- (* any64 *)
+  destruct (zlt fr 8); destruct H.
+  subst. right; apply freg_param_in_params.
+  eapply IHtyl; eauto.
+  subst. split. apply Zle_ge. apply align_le. omega. congruence.
+  eapply INCR. eapply IHtyl; eauto.
+  apply Zle_trans with (align ofs 2). apply align_le; omega. omega.
 Qed.
 
 Remark loc_arguments_vararg_charact:
@@ -530,6 +553,20 @@ Proof.
   right; apply freg_param_in_params.
   split. xomega. congruence.
   eapply INCR. eapply IHtyl; eauto. omega.
+- (* any32 *)
+  destruct H. 
+  destruct (zlt ofs 0); subst l. 
+  left; apply ireg_param_in_params.
+  split. xomega. congruence.
+  eapply INCR. eapply IHtyl; eauto. omega.
+- (* any64 *)
+  set (ofs' := align ofs 2) in *.
+  assert (ofs <= ofs') by (apply align_le; omega).
+  destruct H.
+  destruct (zlt ofs' 0); subst l.
+  right; apply freg_param_in_params.
+  split. xomega. congruence.
+  eapply INCR. eapply IHtyl; eauto. omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -569,6 +606,10 @@ Proof.
   apply Zle_trans with (align ofs0 2 + 2); auto; omega.
   destruct (zlt fr 8); eauto.
   apply Zle_trans with (ofs0 + 1); eauto. omega.
+  destruct (zlt ir 4); eauto. apply Zle_trans with (ofs0 + 1); auto; omega.
+  destruct (zlt fr 8); eauto.
+  apply Zle_trans with (align ofs0 2). apply align_le; omega.
+  apply Zle_trans with (align ofs0 2 + 2); auto; omega.
 Qed.
 
 Remark size_arguments_vararg_above:
@@ -582,6 +623,8 @@ Proof.
   assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
   assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
   xomega.
+  xomega.
+  assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
 Qed.
 
 Lemma size_arguments_above:
@@ -626,6 +669,18 @@ Proof.
   eauto.
   inv H. apply size_arguments_rec_above.
   eauto.
+- (* any32 *)
+  destruct (zlt ir 4); destruct H.
+  discriminate.
+  eauto.
+  inv H. apply size_arguments_rec_above. 
+  eauto.
+- (* any64 *)
+  destruct (zlt fr 8); destruct H.
+  discriminate.
+  eauto. 
+  inv H. apply size_arguments_rec_above. 
+  eauto.
 Qed.
 
 Lemma loc_arguments_vararg_bounded:
@@ -656,6 +711,14 @@ Proof.
   destruct H. 
   destruct (zlt ofs0 0); inv H. apply size_arguments_vararg_above. 
   eauto.
+- (* any32 *)
+  destruct H. 
+  destruct (zlt ofs0 0); inv H. apply size_arguments_vararg_above. 
+  eauto.
+- (* any64 *)
+  destruct H.
+  destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_vararg_above.
+  eauto.
 Qed.
 
 Lemma loc_arguments_bounded:
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-07-23 08:54:56 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-07-23 08:54:56 +0000
commit	2a0168fea37b68ad14e2cb60bf215111e49d4870 (patch)
tree	2f59373790d8ce3a5df66ef7a692271cf0666c6c /arm
parent	00805153cf9b88aa07cc6694b17d93f5ba2e7de8 (diff)