Merge of "newspilling" branch:

- Support single-precision floats as first-class values - Introduce chunks Many32, Many64 and types Tany32, Tany64 to support saving and restoring registers without knowing the exact types (int/single/float) of their contents, just their sizes. - Memory model: generalize the opaque encoding of pointers to apply to any value, not just pointers, if chunks Many32/Many64 are selected. - More properties of FP arithmetic proved. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2537 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-07-23 08:54:56 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2014-07-23 08:54:56 +0000
commit: 2a0168fea37b68ad14e2cb60bf215111e49d4870 (patch)
tree: 2f59373790d8ce3a5df66ef7a692271cf0666c6c /ia32
parent: 00805153cf9b88aa07cc6694b17d93f5ba2e7de8 (diff)
17 files changed, 698 insertions, 324 deletions
diff --git a/ia32/Archi.v b/ia32/Archi.v
index a2e136c..fff25cf 100644
--- a/ia32/Archi.v
+++ b/ia32/Archi.v
@@ -25,10 +25,19 @@ Definition big_endian := false.
 Notation align_int64 := 4%Z (only parsing).
 Notation align_float64 := 4%Z (only parsing).
 
-Program Definition default_pl : bool * nan_pl 53 := 
+Program Definition default_pl_64 : bool * nan_pl 53 := 
   (true, nat_iter 51 xO xH).
 
-Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+Definition choose_binop_pl_64 (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
   false.                        (**r always choose first NaN *)
 
-Global Opaque big_endian default_pl choose_binop_pl.
+Program Definition default_pl_32 : bool * nan_pl 24 := 
+  (true, nat_iter 22 xO xH).
+
+Definition choose_binop_pl_32 (s1: bool) (pl1: nan_pl 24) (s2: bool) (pl2: nan_pl 24) :=
+  false.                        (**r always choose first NaN *)
+
+Global Opaque big_endian
+              default_pl_64 choose_binop_pl_64
+              default_pl_32 choose_binop_pl_32.
+
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 2e5b8b9..15f80e4 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -118,10 +118,13 @@ Inductive instruction: Type :=
   | Pmovsd_fi (rd: freg) (n: float)     (**r (pseudo-instruction) *)
   | Pmovsd_fm (rd: freg) (a: addrmode)
   | Pmovsd_mf (a: addrmode) (r1: freg)
-  | Pfld_f (r1: freg)                   (**r [fld] from XMM register (pseudo) *)
-  | Pfld_m (a: addrmode)                (**r [fld] from memory *)
-  | Pfstp_f (rd: freg)                  (**r [fstp] to XMM register (pseudo) *)
-  | Pfstp_m (a: addrmode)               (**r [fstp] to memory *)
+  | Pmovss_fi (rd: freg) (n: float32)   (**r [movss] (single 32-bit float) *)
+  | Pmovss_fm (rd: freg) (a: addrmode)
+  | Pmovss_mf (a: addrmode) (r1: freg)
+  | Pfldl_m (a: addrmode)               (**r [fld] double precision *)
+  | Pfstpl_m (a: addrmode)              (**r [fstp] double precision *)
+  | Pflds_m (a: addrmode)               (**r [fld] simple precision *)
+  | Pfstps_m (a: addrmode)              (**r [fstp] simple precision *)
   | Pxchg_rr (r1: ireg) (r2: ireg)      (**r register-register exchange *)
   (** Moves with conversion *)
   | Pmovb_mr (a: addrmode) (rs: ireg)   (**r [mov] (8-bit int) *)
@@ -134,11 +137,12 @@ Inductive instruction: Type :=
   | Pmovzw_rm (rd: ireg) (a: addrmode)
   | Pmovsw_rr (rd: ireg) (rs: ireg)     (**r [movsw] (16-bit sign-extension) *)
   | Pmovsw_rm (rd: ireg) (a: addrmode)
-  | Pcvtss2sd_fm (rd: freg) (a: addrmode) (**r [cvtss2sd] (single float load) *)
-  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single (pseudo) *)
-  | Pcvtsd2ss_mf (a: addrmode) (r1: freg) (**r [cvtsd2ss] (single float store *)
+  | Pcvtsd2ss_ff (rd: freg) (r1: freg)  (**r conversion to single float *)
+  | Pcvtss2sd_ff (rd: freg) (r1: freg)  (**r conversion to double float *)
   | Pcvttsd2si_rf (rd: ireg) (r1: freg) (**r double to signed int *)
   | Pcvtsi2sd_fr (rd: freg) (r1: ireg)  (**r signed int to double *)
+  | Pcvttss2si_rf (rd: ireg) (r1: freg) (**r single to signed int *)
+  | Pcvtsi2ss_fr (rd: freg) (r1: ireg)  (**r signed int to single *)
   (** Integer arithmetic *)
   | Plea (rd: ireg) (a: addrmode)
   | Pneg (rd: ireg)
@@ -180,6 +184,14 @@ Inductive instruction: Type :=
   | Pabsd (rd: freg)
   | Pcomisd_ff (r1 r2: freg)
   | Pxorpd_f (rd: freg)	              (**r [xor] with self = set to zero *)
+  | Padds_ff (rd: freg) (r1: freg)
+  | Psubs_ff (rd: freg) (r1: freg)
+  | Pmuls_ff (rd: freg) (r1: freg)
+  | Pdivs_ff (rd: freg) (r1: freg)
+  | Pnegs (rd: freg)
+  | Pabss (rd: freg)
+  | Pcomiss_ff (r1 r2: freg)
+  | Pxorps_f (rd: freg)	              (**r [xor] with self = set to zero *)
   (** Branches and calls *)
   | Pjmp_l (l: label)
   | Pjmp_s (symb: ident) (sg: signature)
@@ -190,6 +202,11 @@ Inductive instruction: Type :=
   | Pcall_s (symb: ident) (sg: signature)
   | Pcall_r (r: ireg) (sg: signature)
   | Pret
+  (** Saving and restoring registers *)
+  | Pmov_rm_a (rd: ireg) (a: addrmode)  (**r like [Pmov_rm], using [Many32] chunk *)
+  | Pmov_mr_a (a: addrmode) (rs: ireg)  (**r like [Pmov_mr], using [Many32] chunk *)
+  | Pmovsd_fm_a (rd: freg) (a: addrmode) (**r like [Pmovsd_fm], using [Many64] chunk *)
+  | Pmovsd_mf_a (a: addrmode) (r1: freg) (**r like [Pmovsd_mf], using [Many64] chunk *)
   (** Pseudo-instructions *)
   | Plabel(l: label)
   | Pallocframe(sz: Z)(ofs_ra ofs_link: int)
@@ -330,6 +347,18 @@ Definition compare_floats (vx vy: val) (rs: regset) : regset :=
       undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
   end.
 
+Definition compare_floats32 (vx vy: val) (rs: regset) : regset :=
+  match vx, vy with
+  | Vsingle x, Vsingle y =>
+      rs #ZF  <- (Val.of_bool (negb (Float32.cmp Cne x y)))
+         #CF  <- (Val.of_bool (negb (Float32.cmp Cge x y)))
+         #PF  <- (Val.of_bool (negb (Float32.cmp Ceq x y || Float32.cmp Clt x y || Float32.cmp Cgt x y)))
+         #SF  <- Vundef
+         #OF  <- Vundef
+  | _, _ =>
+      undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs
+  end.
+
 (** Testing a condition *)
 
 Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
@@ -484,14 +513,20 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       exec_load Mfloat64 m a rs rd
   | Pmovsd_mf a r1 =>
       exec_store Mfloat64 m a rs r1 nil
-  | Pfld_f r1 =>
-      Next (nextinstr (rs#ST0 <- (rs r1))) m
-  | Pfld_m a =>
+  | Pmovss_fi rd n =>
+      Next (nextinstr (rs#rd <- (Vsingle n))) m
+  | Pmovss_fm rd a =>
+      exec_load Mfloat32 m a rs rd
+  | Pmovss_mf a r1 =>
+      exec_store Mfloat32 m a rs r1 nil
+  | Pfldl_m a =>
       exec_load Mfloat64 m a rs ST0
-  | Pfstp_f rd =>
-      Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m
-  | Pfstp_m a =>
+  | Pfstpl_m a =>
       exec_store Mfloat64 m a rs ST0 (ST0 :: nil)
+  | Pflds_m a =>
+      exec_load Mfloat32 m a rs ST0
+  | Pfstps_m a =>
+      exec_store Mfloat32 m a rs ST0 (ST0 :: nil)
   | Pxchg_rr r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m
   (** Moves with conversion *)
@@ -515,16 +550,18 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#rd <- (Val.sign_ext 16 rs#r1))) m
   | Pmovsw_rm rd a =>
       exec_load Mint16signed m a rs rd
-  | Pcvtss2sd_fm rd a =>
-      exec_load Mfloat32 m a rs rd
   | Pcvtsd2ss_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
-  | Pcvtsd2ss_mf a r1 =>
-      exec_store Mfloat32 m a rs r1 (FR XMM7 :: nil)
+  | Pcvtss2sd_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.floatofsingle rs#r1))) m
   | Pcvttsd2si_rf rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pcvtsi2sd_fr rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.floatofint rs#r1)))) m
+  | Pcvttss2si_rf rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.intofsingle rs#r1)))) m
+  | Pcvtsi2ss_fr rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.maketotal (Val.singleofint rs#r1)))) m
   (** Integer arithmetic *)
   | Plea rd a =>
       Next (nextinstr (rs#rd <- (eval_addrmode a rs))) m
@@ -604,7 +641,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       end
   | Psetcc c rd =>
       Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m
-  (** Arithmetic operations over floats *)
+  (** Arithmetic operations over double-precision floats *)
   | Paddd_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m
   | Psubd_ff rd r1 =>
@@ -621,6 +658,23 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (compare_floats (rs r1) (rs r2) rs)) m
   | Pxorpd_f rd =>
       Next (nextinstr_nf (rs#rd <- (Vfloat Float.zero))) m
+  (** Arithmetic operations over single-precision floats *)
+  | Padds_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.addfs rs#rd rs#r1))) m
+  | Psubs_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.subfs rs#rd rs#r1))) m
+  | Pmuls_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.mulfs rs#rd rs#r1))) m
+  | Pdivs_ff rd r1 =>
+      Next (nextinstr (rs#rd <- (Val.divfs rs#rd rs#r1))) m
+  | Pnegs rd =>
+      Next (nextinstr (rs#rd <- (Val.negfs rs#rd))) m
+  | Pabss rd =>
+      Next (nextinstr (rs#rd <- (Val.absfs rs#rd))) m
+  | Pcomiss_ff r1 r2 =>
+      Next (nextinstr (compare_floats32 (rs r1) (rs r2) rs)) m
+  | Pxorps_f rd =>
+      Next (nextinstr_nf (rs#rd <- (Vsingle Float32.zero))) m
   (** Branches and calls *)
   | Pjmp_l lbl =>
       goto_label f lbl rs m
@@ -655,6 +709,15 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (rs#RA <- (Val.add rs#PC Vone) #PC <- (rs r)) m
   | Pret =>
       Next (rs#PC <- (rs#RA)) m
+  (** Saving and restoring registers *)
+  | Pmov_rm_a rd a =>
+      exec_load Many32 m a rs rd
+  | Pmov_mr_a a r1 =>
+      exec_store Many32 m a rs r1 nil
+  | Pmovsd_fm_a rd a =>
+      exec_load Many64 m a rs rd
+  | Pmovsd_mf_a a r1 =>
+      exec_store Many64 m a rs r1 nil
   (** Pseudo-instructions *)
   | Plabel lbl =>
       Next (nextinstr rs) m
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index f92d72c..9c0a76e 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -50,8 +50,6 @@ Definition mk_mov (rd rs: preg) (k: code) : res code :=
   match rd, rs with
   | IR rd, IR rs => OK (Pmov_rr rd rs :: k)
   | FR rd, FR rs => OK (Pmovsd_ff rd rs :: k)
-  | ST0, FR rs => OK (Pfld_f rs :: k)
-  | FR rd, ST0 => OK (Pfstp_f rd :: k)
   | _, _ => Error(msg "Asmgen.mk_mov")
   end.
 
@@ -90,42 +88,30 @@ Definition mk_smallstore (sto: addrmode -> ireg ->instruction)
   else
     OK (Pmov_rr EAX rs :: sto addr EAX :: k).
 
-(** Accessing slots in the stack frame.  *)
+(** Accessing slots in the stack frame. *)
 
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of dst;
-      OK (Pmov_rm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-  | Tfloat =>
-      match preg_of dst with
-      | FR r => OK (Pmovsd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | ST0  => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | _ => Error (msg "Asmgen.loadind")
-      end
-  | Tsingle =>
-      do r <- freg_of dst;
-      OK (Pcvtss2sd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
-  | Tlong =>
-      Error (msg "Asmgen.loadind")
+  match ty, preg_of dst with
+  | Tint, IR r => OK (Pmov_rm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tsingle, FR r => OK (Pmovss_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tsingle, ST0  => OK (Pflds_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, FR r => OK (Pmovsd_fm r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, ST0  => OK (Pfldl_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany32, IR r => OK (Pmov_rm_a r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany64, FR r => OK (Pmovsd_fm_a r (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | _, _ => Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
-  match ty with
-  | Tint =>
-      do r <- ireg_of src;
-      OK (Pmov_mr (Addrmode (Some base) None (inl _ ofs)) r :: k)
-  | Tfloat =>
-      match preg_of src with
-      | FR r => OK (Pmovsd_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
-      | ST0  => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k)
-      | _ => Error (msg "Asmgen.loadind")
-      end
-  | Tsingle =>
-      do r <- freg_of src;
-      OK (Pcvtsd2ss_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
-  | Tlong =>
-      Error (msg "Asmgen.storeind")
+  match ty, preg_of src with
+  | Tint, IR r => OK (Pmov_mr (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tsingle, FR r => OK (Pmovss_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tsingle, ST0 => OK (Pfstps_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tfloat, FR r => OK (Pmovsd_mf (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tfloat, ST0  => OK (Pfstpl_m (Addrmode (Some base) None (inl _ ofs)) :: k)
+  | Tany32, IR r => OK (Pmov_mr_a (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | Tany64, FR r => OK (Pmovsd_mf_a (Addrmode (Some base) None (inl _ ofs)) r :: k)
+  | _, _ => Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of addressing modes *)
@@ -163,6 +149,12 @@ Definition floatcomp (cmp: comparison) (r1 r2: freg) : instruction :=
   | Ceq | Cne | Cgt | Cge => Pcomisd_ff r1 r2
   end.
 
+Definition floatcomp32 (cmp: comparison) (r1 r2: freg) : instruction :=
+  match cmp with
+  | Clt | Cle => Pcomiss_ff r2 r1
+  | Ceq | Cne | Cgt | Cge => Pcomiss_ff r1 r2
+  end.
+
 (** Translation of a condition.  Prepends to [k] the instructions
   that evaluate the condition and leave its boolean result in bits
   of the condition register. *)
@@ -183,6 +175,10 @@ Definition transl_cond
       do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
   | Cnotcompf cmp, a1 :: a2 :: nil =>
       do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp cmp r1 r2 :: k)
+  | Ccompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
+  | Cnotcompfs cmp, a1 :: a2 :: nil =>
+      do r1 <- freg_of a1; do r2 <- freg_of a2; OK (floatcomp32 cmp r1 r2 :: k)
   | Cmaskzero n, a1 :: nil =>
       do r1 <- ireg_of a1; OK (Ptest_ri r1 n :: k)
   | Cmasknotzero n, a1 :: nil =>
@@ -224,7 +220,7 @@ Definition testcond_for_condition (cond: condition) : extcond :=
   | Ccompu c => Cond_base(testcond_for_unsigned_comparison c)
   | Ccompimm c n => Cond_base(testcond_for_signed_comparison c)
   | Ccompuimm c n => Cond_base(testcond_for_unsigned_comparison c)
-  | Ccompf c =>
+  | Ccompf c | Ccompfs c =>
       match c with
       | Ceq => Cond_and Cond_np Cond_e
       | Cne => Cond_or Cond_p Cond_ne
@@ -233,7 +229,7 @@ Definition testcond_for_condition (cond: condition) : extcond :=
       | Cgt => Cond_base Cond_a
       | Cge => Cond_base Cond_ae
       end
-  | Cnotcompf c =>
+  | Cnotcompf c | Cnotcompfs c =>
       match c with
       | Ceq => Cond_or Cond_p Cond_ne
       | Cne => Cond_and Cond_np Cond_e
@@ -288,6 +284,9 @@ Definition transl_op
   | Ofloatconst f, nil =>
       do r <- freg_of res; 
       OK ((if Float.eq_dec f Float.zero then Pxorpd_f r else Pmovsd_fi r f) :: k)
+  | Osingleconst f, nil =>
+      do r <- freg_of res; 
+      OK ((if Float32.eq_dec f Float32.zero then Pxorps_f r else Pmovss_fi r f) :: k)
   | Oindirectsymbol id, nil =>
       do r <- ireg_of res;
       OK (Pmov_ra r id :: k)
@@ -412,12 +411,36 @@ Definition transl_op
   | Odivf, a1 :: a2 :: nil =>
       assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k)
+  | Onegfs, a1 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; OK (Pnegs r :: k)
+  | Oabsfs, a1 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; OK (Pabss r :: k)
+  | Oaddfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Padds_ff r r2 :: k)
+  | Osubfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Psubs_ff r r2 :: k)
+  | Omulfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuls_ff r r2 :: k)
+  | Odivfs, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivs_ff r r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k)
+  | Ofloatofsingle, a1 :: nil =>
+      do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtss2sd_ff r r1 :: k)
   | Ointoffloat, a1 :: nil =>
       do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttsd2si_rf r r1 :: k)
   | Ofloatofint, a1 :: nil =>
       do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2sd_fr r r1 :: k)
+  | Ointofsingle, a1 :: nil =>
+      do r <- ireg_of res; do r1 <- freg_of a1; OK (Pcvttss2si_rf r r1 :: k)
+  | Osingleofint, a1 :: nil =>
+      do r <- freg_of res; do r1 <- ireg_of a1; OK (Pcvtsi2ss_fr r r1 :: k)
   | Ocmp c, args =>
       do r <- ireg_of res;
       transl_cond c args (mk_setcc (testcond_for_condition c) r k)
@@ -443,10 +466,10 @@ Definition transl_load (chunk: memory_chunk)
   | Mint32 =>
       do r <- ireg_of dest; OK(Pmov_rm r am :: k)
   | Mfloat32 =>
-      do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k)
+      do r <- freg_of dest; OK(Pmovss_fm r am :: k)
   | Mfloat64 =>
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_load")
   end.
 
@@ -462,10 +485,10 @@ Definition transl_store (chunk: memory_chunk)
   | Mint32 =>
       do r <- ireg_of src; OK(Pmov_mr am r :: k)
   | Mfloat32 =>
-      do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k)
+      do r <- freg_of src; OK(Pmovss_mf am r :: k)
   | Mfloat64 =>
       do r <- freg_of src; OK(Pmovsd_mf am r :: k)
-  | Mint64 =>
+  | _ =>
       Error (msg "Asmgen.transl_store")
   end.
 
diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v
index 881375f..eba710a 100644
--- a/ia32/Asmgenproof.v
+++ b/ia32/Asmgenproof.v
@@ -168,11 +168,7 @@ Remark loadind_label:
   loadind base ofs ty dst k = OK c ->
   tail_nolabel k c.
 Proof.
-  unfold loadind; intros. destruct ty.
-  TailNoLabel.
-  destruct (preg_of dst); TailNoLabel.
-  discriminate.
-  TailNoLabel.
+  unfold loadind; intros. destruct ty; try discriminate; destruct (preg_of dst); TailNoLabel.
 Qed.
 
 Remark storeind_label:
@@ -180,11 +176,7 @@ Remark storeind_label:
   storeind src base ofs ty k = OK c ->
   tail_nolabel k c.
 Proof.
-  unfold storeind; intros. destruct ty.
-  TailNoLabel.
-  destruct (preg_of src); TailNoLabel.
-  discriminate.
-  TailNoLabel.
+  unfold storeind; intros. destruct ty; try discriminate; destruct (preg_of src); TailNoLabel.
 Qed.
 
 Remark mk_setcc_base_label:
@@ -220,6 +212,8 @@ Proof.
   destruct (Int.eq_dec i Int.zero); TailNoLabel.
   destruct c0; simpl; TailNoLabel.
   destruct c0; simpl; TailNoLabel.
+  destruct c0; simpl; TailNoLabel.
+  destruct c0; simpl; TailNoLabel.
 Qed.
 
 Remark transl_op_label:
@@ -230,6 +224,7 @@ Proof.
   unfold transl_op; intros. destruct op; TailNoLabel.
   destruct (Int.eq_dec i Int.zero); TailNoLabel.
   destruct (Float.eq_dec f Float.zero); TailNoLabel.
+  destruct (Float32.eq_dec f Float32.zero); TailNoLabel.
   eapply tail_nolabel_trans. eapply transl_cond_label; eauto. eapply mk_setcc_label.  
 Qed.
 
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index b3c815b..7d71d1a 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -107,7 +107,7 @@ Lemma mk_mov_correct:
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
   /\ rs2#rd = rs1#rs
-  /\ forall r, data_preg r = true -> r <> ST0 -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> rd -> rs2#r = rs1#r.
 Proof.
   unfold mk_mov; intros. 
   destruct rd; try (monadInv H); destruct rs; monadInv H.
@@ -117,12 +117,6 @@ Proof.
 (* movd *)
   econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
   split. Simplifs. intros; Simplifs.
-(* getfp0 *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
-  split. Simplifs. intros; Simplifs.
-(* setfp0 *)
-  econstructor. split. apply exec_straight_one. simpl. eauto. auto. 
-  split. Simplifs. intros; Simplifs.
 Qed.
 
 (** Properties of division *)
@@ -288,27 +282,10 @@ Proof.
   set (addr := Addrmode (Some base) None (inl (ident * int) ofs)) in *.
   assert (eval_addrmode ge addr rs = Val.add rs#base (Vint ofs)).
     unfold addr. simpl. rewrite Int.add_commut; rewrite Int.add_zero; auto. 
-  destruct ty; simpl in H0.
-  (* int *)
-  monadInv H.
-  rewrite (ireg_of_eq _ _ EQ). econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_load. rewrite H1. rewrite H0.
-  eauto. auto.
-  intuition Simplifs.
-  (* float *)
-  exists (nextinstr_nf (rs#(preg_of dst) <- v)).
-  split. destruct (preg_of dst); inv H; apply exec_straight_one; simpl; auto.
-  unfold exec_load. rewrite H1; rewrite H0; auto. 
-  unfold exec_load. rewrite H1; rewrite H0; auto.
-  intuition Simplifs.
-  (* long *)
-  inv H.
-  (* single *)
-  monadInv H.
-  rewrite (freg_of_eq _ _ EQ). econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_load. rewrite H1. rewrite H0.
-  eauto. auto.
-  intuition Simplifs.
+  exists (nextinstr_nf (rs#(preg_of dst) <- v)); split.
+- destruct ty; try discriminate; destruct (preg_of dst); inv H; simpl in H0;
+  apply exec_straight_one; auto; simpl; unfold exec_load; rewrite H1, H0; auto.
+- intuition Simplifs.
 Qed.
 
 Lemma storeind_correct:
@@ -319,33 +296,15 @@ Lemma storeind_correct:
      exec_straight ge fn c rs m k rs' m'
   /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
 Proof.
+Local Transparent destroyed_by_setstack.
   unfold storeind; intros.
   set (addr := Addrmode (Some base) None (inl (ident * int) ofs)) in *.
   assert (eval_addrmode ge addr rs = Val.add rs#base (Vint ofs)).
     unfold addr. simpl. rewrite Int.add_commut; rewrite Int.add_zero; auto. 
-  destruct ty; simpl in H0.
-  (* int *)
-  monadInv H.
-  rewrite (ireg_of_eq _ _ EQ) in H0. econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. rewrite H0.
-  eauto. auto. 
-  intros; Simplifs.
-  (* float *)
-  destruct (preg_of src); inv H.
-  econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. apply nextinstr_nf_inv1; auto.
-  econstructor; split. apply exec_straight_one.
-  simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. simpl. Simplifs.
-  (* long *)
-  inv H.
-  (* single *)
-  monadInv H.
-  rewrite (freg_of_eq _ _ EQ) in H0. econstructor.
-  split. apply exec_straight_one. simpl. unfold exec_store. rewrite H1. rewrite H0.
-  simpl. eauto. auto.
-  intros. destruct H2. Simplifs.
+  destruct ty; try discriminate; destruct (preg_of src); inv H; simpl in H0;
+  (econstructor; split;
+  [apply exec_straight_one; [simpl; unfold exec_store; rewrite H1, H0; eauto|auto]
+  |simpl; intros; unfold undef_regs; repeat Simplifs]).
 Qed.
 
 (** Translation of addressing modes *)
@@ -546,6 +505,21 @@ Proof.
   intros. Simplifs.
 Qed.
 
+Lemma compare_floats32_spec:
+  forall rs n1 n2,
+  let rs' := nextinstr (compare_floats32 (Vsingle n1) (Vsingle n2) rs) in
+     rs'#ZF = Val.of_bool (negb (Float32.cmp Cne n1 n2))
+  /\ rs'#CF = Val.of_bool (negb (Float32.cmp Cge n1 n2))
+  /\ rs'#PF = Val.of_bool (negb (Float32.cmp Ceq n1 n2 || Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2))
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
+Proof.
+  intros. unfold rs'; unfold compare_floats32.
+  split. auto. 
+  split. auto.
+  split. auto.
+  intros. Simplifs.
+Qed.
+
 Definition eval_extcond (xc: extcond) (rs: regset) : option bool :=
   match xc with
   | Cond_base c =>
@@ -664,8 +638,104 @@ Proof.
   destruct (Float.cmp Cge n1 n2); auto.
 Qed.
 
+Lemma testcond_for_float32_comparison_correct:
+  forall c n1 n2 rs,
+  eval_extcond (testcond_for_condition (Ccompfs c))
+               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                            (Vsingle (swap_floats c n2 n1)) rs)) =
+  Some(Float32.cmp c n1 n2).
+Proof.
+  intros.
+  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
+  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                        (Vsingle (swap_floats c n2 n1)) rs)).
+  intros [A [B [C D]]].
+  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
+  destruct c; simpl.
+(* eq *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* lt *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2).
+  rewrite <- (Float32.cmp_swap Cne n1 n2).
+  simpl.
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_le_lt_eq.
+  caseEq (Float32.cmp Clt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_lt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* le *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
+  destruct (Float32.cmp Cle n1 n2); auto.
+(* gt *)
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_ge_gt_eq. 
+  caseEq (Float32.cmp Cgt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_gt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ge *)
+  destruct (Float32.cmp Cge n1 n2); auto.
+Qed.
+
+Lemma testcond_for_neg_float32_comparison_correct:
+  forall c n1 n2 rs,
+  eval_extcond (testcond_for_condition (Cnotcompfs c))
+               (nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                            (Vsingle (swap_floats c n2 n1)) rs)) =
+  Some(negb(Float32.cmp c n1 n2)).
+Proof.
+  intros.
+  generalize (compare_floats32_spec rs (swap_floats c n1 n2) (swap_floats c n2 n1)).
+  set (rs' := nextinstr (compare_floats32 (Vsingle (swap_floats c n1 n2))
+                                          (Vsingle (swap_floats c n2 n1)) rs)).
+  intros [A [B [C D]]].
+  unfold eval_extcond, eval_testcond. rewrite A; rewrite B; rewrite C.
+  destruct c; simpl.
+(* eq *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* ne *)
+  rewrite Float32.cmp_ne_eq.
+  caseEq (Float32.cmp Ceq n1 n2); intros.
+  auto.
+  simpl. destruct (Float32.cmp Clt n1 n2 || Float32.cmp Cgt n1 n2); auto.
+(* lt *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2).
+  rewrite <- (Float32.cmp_swap Cne n1 n2).
+  simpl.
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_le_lt_eq.
+  caseEq (Float32.cmp Clt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_lt_eq_false; eauto.
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* le *)
+  rewrite <- (Float32.cmp_swap Cge n1 n2). simpl.
+  destruct (Float32.cmp Cle n1 n2); auto.
+(* gt *)
+  rewrite Float32.cmp_ne_eq. rewrite Float32.cmp_ge_gt_eq. 
+  caseEq (Float32.cmp Cgt n1 n2); intros; simpl.
+  caseEq (Float32.cmp Ceq n1 n2); intros; simpl. 
+  elimtype False. eapply Float32.cmp_gt_eq_false; eauto. 
+  auto. 
+  destruct (Float32.cmp Ceq n1 n2); auto.
+(* ge *)
+  destruct (Float32.cmp Cge n1 n2); auto.
+Qed.
+
 Remark swap_floats_commut:
-  forall c x y, swap_floats c (Vfloat x) (Vfloat y) = Vfloat (swap_floats c x y).
+  forall (A B: Type) c (f: A -> B) x y, swap_floats c (f x) (f y) = f (swap_floats c x y).
 Proof.
   intros. destruct c; auto. 
 Qed.
@@ -679,7 +749,18 @@ Proof.
   assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
     simpl. Simplifs. 
   unfold compare_floats; destruct vx; destruct vy; auto. Simplifs. 
-Qed.  
+Qed.
+
+Remark compare_floats32_inv:
+  forall vx vy rs r,
+  r <> CR ZF -> r <> CR CF -> r <> CR PF -> r <> CR SF -> r <> CR OF ->
+  compare_floats32 vx vy rs r = rs r.
+Proof.
+  intros. 
+  assert (DFL: undef_regs (CR ZF :: CR CF :: CR PF :: CR SF :: CR OF :: nil) rs r = rs r).
+    simpl. Simplifs. 
+  unfold compare_floats32; destruct vx; destruct vy; auto. Simplifs. 
+Qed.
 
 Lemma transl_cond_correct:
   forall cond args k c rs m,
@@ -740,6 +821,24 @@ Proof.
   split. destruct (rs x); destruct (rs x0); simpl; auto.
   repeat rewrite swap_floats_commut. apply testcond_for_neg_float_comparison_correct.
   intros. Simplifs. apply compare_floats_inv; auto with asmgen. 
+(* compfs *)
+  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
+  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
+  split. apply exec_straight_one. 
+  destruct c0; simpl; auto.
+  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen. 
+  split. destruct (rs x); destruct (rs x0); simpl; auto.
+  repeat rewrite swap_floats_commut. apply testcond_for_float32_comparison_correct.
+  intros. Simplifs. apply compare_floats32_inv; auto with asmgen. 
+(* notcompfs *)
+  simpl. rewrite (freg_of_eq _ _ EQ). rewrite (freg_of_eq _ _ EQ1).
+  exists (nextinstr (compare_floats32 (swap_floats c0 (rs x) (rs x0)) (swap_floats c0 (rs x0) (rs x)) rs)).
+  split. apply exec_straight_one. 
+  destruct c0; simpl; auto.
+  unfold nextinstr. rewrite Pregmap.gss. rewrite compare_floats32_inv; auto with asmgen. 
+  split. destruct (rs x); destruct (rs x0); simpl; auto.
+  repeat rewrite swap_floats_commut. apply testcond_for_neg_float32_comparison_correct.
+  intros. Simplifs. apply compare_floats32_inv; auto with asmgen. 
 (* maskzero *)
   simpl. rewrite (ireg_of_eq _ _ EQ).
   econstructor. split. apply exec_straight_one. simpl; eauto. auto.
@@ -909,11 +1008,13 @@ Transparent destroyed_by_op.
   destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail).
 (* move *)
   exploit mk_mov_correct; eauto. intros [rs2 [A [B C]]]. 
-  apply SAME. exists rs2. split. eauto. split. simpl. auto. intros. destruct H; auto.
+  apply SAME. exists rs2. eauto. 
 (* intconst *)
   apply SAME. destruct (Int.eq_dec i Int.zero). subst i. TranslOp. TranslOp.
 (* floatconst *)
   apply SAME. destruct (Float.eq_dec f Float.zero). subst f. TranslOp. TranslOp.
+(* singleconst *)
+  apply SAME. destruct (Float32.eq_dec f Float32.zero). subst f. TranslOp. TranslOp.
 (* cast8signed *)
   apply SAME. eapply mk_intconv_correct; eauto.
 (* cast8unsigned *)
@@ -963,6 +1064,10 @@ Transparent destroyed_by_op.
   apply SAME. TranslOp. rewrite H0; auto.
 (* floatofint *)
   apply SAME. TranslOp. rewrite H0; auto.
+(* intofsingle *)
+  apply SAME. TranslOp. rewrite H0; auto.
+(* singleofint *)
+  apply SAME. TranslOp. rewrite H0; auto.
 (* condition *)
   exploit transl_cond_correct; eauto. intros [rs2 [P [Q R]]].
   exploit mk_setcc_correct; eauto. intros [rs3 [S [T U]]].
diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp
index b27f405..8c7f01f 100644
--- a/ia32/ConstpropOp.vp
+++ b/ia32/ConstpropOp.vp
@@ -174,10 +174,15 @@ Definition make_moduimm n (r1 r2: reg) :=
   end.
 
 Definition make_mulfimm (n: float) (r r1 r2: reg) :=
-  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  if Float.eq_dec n (Float.of_int (Int.repr 2))
   then (Oaddf, r :: r :: nil)
   else (Omulf, r1 :: r2 :: nil).
 
+Definition make_mulfsimm (n: float32) (r r1 r2: reg) :=
+  if Float32.eq_dec n (Float32.of_int (Int.repr 2))
+  then (Oaddfs, r :: r :: nil)
+  else (Omulfs, r1 :: r2 :: nil).
+
 Definition make_cast8signed (r: reg) (a: aval) :=
   if vincl a (Sgn 8) then (Omove, r :: nil) else (Ocast8signed, r :: nil).
 Definition make_cast8unsigned (r: reg) (a: aval) :=
@@ -186,10 +191,6 @@ Definition make_cast16signed (r: reg) (a: aval) :=
   if vincl a (Sgn 16) then (Omove, r :: nil) else (Ocast16signed, r :: nil).
 Definition make_cast16unsigned (r: reg) (a: aval) :=
   if vincl a (Uns 16) then (Omove, r :: nil) else (Ocast16unsigned, r :: nil).
-Definition make_singleoffloat (r: reg) (a: aval) :=
-  if vincl a Fsingle && generate_float_constants tt
-  then (Omove, r :: nil)
-  else (Osingleoffloat, r :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list aval) :=
@@ -217,9 +218,10 @@ Nondetfunction op_strength_reduction
   | Olea addr, args, vl =>
       let (addr', args') := addr_strength_reduction addr args vl in
       (Olea addr', args')
-  | Osingleoffloat, r1 :: nil, v1 :: nil => make_singleoffloat r1 v1
   | Ocmp c, args, vl => make_cmp c args vl
   | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
   | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
+  | Omulfs, r1 :: r2 :: nil, v1 :: FS n2 :: nil => make_mulfsimm n2 r1 r1 r2
+  | Omulfs, r1 :: r2 :: nil, FS n1 :: v2 :: nil => make_mulfsimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v
index d9eea2b..6adb26f 100644
--- a/ia32/ConstpropOpproof.v
+++ b/ia32/ConstpropOpproof.v
@@ -77,6 +77,10 @@ Ltac SimplVM :=
       let E := fresh in
       assert (E: v = Vfloat n) by (inversion H; auto);
       rewrite E in *; clear H; SimplVM
+  | [ H: vmatch _ ?v (FS ?n) |- _ ] =>
+      let E := fresh in
+      assert (E: v = Vsingle n) by (inversion H; auto);
+      rewrite E in *; clear H; SimplVM
   | [ H: vmatch _ ?v (Ptr(Gl ?id ?ofs)) |- _ ] =>
       let E := fresh in
       assert (E: Val.lessdef v (Genv.symbol_address ge id ofs)) by (eapply vmatch_ptr_gl; eauto); 
@@ -368,7 +372,7 @@ Lemma make_mulfimm_correct:
   exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (e#r1); simpl; auto. rewrite Float.mul2_add; auto. 
   simpl. econstructor; split; eauto. 
@@ -381,13 +385,40 @@ Lemma make_mulfimm_correct_2:
   exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulf e#r1 e#r2) v.
 Proof.
   intros; unfold make_mulfimm. 
-  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  destruct (Float.eq_dec n (Float.of_int (Int.repr 2))); intros. 
   simpl. econstructor; split. eauto. rewrite H; subst n.
   destruct (e#r2); simpl; auto. rewrite Float.mul2_add; auto. 
   rewrite Float.mul_commut; auto. 
   simpl. econstructor; split; eauto. 
 Qed.
 
+Lemma make_mulfsimm_correct:
+  forall n r1 r2,
+  e#r2 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r1 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r1); simpl; auto. rewrite Float32.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfsimm_correct_2:
+  forall n r1 r2,
+  e#r1 = Vsingle n ->
+  let (op, args) := make_mulfsimm n r2 r1 r2 in
+  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.mulfs e#r1 e#r2) v.
+Proof.
+  intros; unfold make_mulfsimm. 
+  destruct (Float32.eq_dec n (Float32.of_int (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (e#r2); simpl; auto. rewrite Float32.mul2_add; auto. 
+  rewrite Float32.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma make_cast8signed_correct:
   forall r x,
   vmatch bc e#r x ->
@@ -444,21 +475,6 @@ Proof.
   econstructor; split; simpl; eauto.
 Qed.
 
-Lemma make_singleoffloat_correct:
-  forall r x,
-  vmatch bc e#r x ->
-  let (op, args) := make_singleoffloat r x in
-  exists v, eval_operation ge (Vptr sp Int.zero) op e##args m = Some v /\ Val.lessdef (Val.singleoffloat e#r) v.
-Proof.
-  intros; unfold make_singleoffloat.
-  destruct (vincl x Fsingle && generate_float_constants tt) eqn:INCL. 
-  InvBooleans. exists e#r; split; auto. 
-  assert (V: vmatch bc e#r Fsingle).
-  { eapply vmatch_ge; eauto. apply vincl_ge; auto. }
-  inv V; simpl; auto. rewrite Float.singleoffloat_of_single by auto. auto.
-  econstructor; split; simpl; eauto.
-Qed.
-
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = map (fun r => AE.get r ae) args ->
@@ -510,14 +526,16 @@ Proof.
   exploit addr_strength_reduction_correct; eauto. 
   destruct (addr_strength_reduction addr args0 vl0) as [addr' args'].
   auto.
-(* singleoffloat *)
-  InvApproxRegs; SimplVM; inv H0. apply make_singleoffloat_correct; auto.
 (* cond *)
   inv H0. apply make_cmp_correct; auto.
 (* mulf *)
   InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfimm_correct; auto.
   InvApproxRegs; SimplVM; inv H0. fold (Val.mulf (Vfloat n1) e#r2).
   rewrite <- H2. apply make_mulfimm_correct_2; auto.
+(* mulfs *)
+  InvApproxRegs; SimplVM; inv H0. rewrite <- H2. apply make_mulfsimm_correct; auto.
+  InvApproxRegs; SimplVM; inv H0. fold (Val.mulfs (Vsingle n1) e#r2).
+  rewrite <- H2. apply make_mulfsimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index da80a6e..a9f2b6c 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -43,11 +43,8 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | AX => Tint | BX => Tint | CX => Tint | DX => Tint
-  | SI => Tint | DI => Tint | BP => Tint
-  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat | X3 => Tfloat
-  | X4 => Tfloat | X5 => Tfloat | X6 => Tfloat | X7 => Tfloat
-  | FP0 => Tfloat
+  | AX | BX | CX | DX | SI | DI | BP => Tany32
+  | X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 | FP0 => Tany64
   end.
 
 Local Open Scope positive_scope.
@@ -76,7 +73,6 @@ Definition is_stack_reg (r: mreg) : bool :=
 
 Definition destroyed_by_op (op: operation): list mreg :=
   match op with
-  | Omove => FP0 :: nil
   | Ocast8signed | Ocast8unsigned | Ocast16signed | Ocast16unsigned => AX :: nil
   | Omulhs => AX :: DX :: nil
   | Omulhu => AX :: DX :: nil
@@ -95,9 +91,7 @@ Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg
 Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
   match chunk with
   | Mint8signed | Mint8unsigned => AX :: CX :: nil
-  | Mint16signed | Mint16unsigned | Mint32 | Mint64 => nil
-  | Mfloat32 => X7 :: nil
-  | Mfloat64 => FP0 :: nil
+  | _ => nil
   end.
 
 Definition destroyed_by_cond (cond: condition): list mreg :=
@@ -116,9 +110,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   | EF_memcpy sz al =>
       if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil
   | EF_vstore (Mint8unsigned|Mint8signed) => AX :: CX :: nil
-  | EF_vstore Mfloat32 => X7 :: nil
   | EF_vstore_global (Mint8unsigned|Mint8signed) _ _ => AX :: nil
-  | EF_vstore_global Mfloat32 _ _ => X7 :: nil
   | EF_builtin id sg =>
       if ident_eq id builtin_write16_reversed
       || ident_eq id builtin_write32_reversed
@@ -127,12 +119,12 @@ Definition destroyed_by_builtin (ef: external_function): list mreg :=
   end.
 
 Definition destroyed_at_function_entry: list mreg :=
-  DX :: FP0 :: nil.   (* must include destroyed_by_op Omove *)
+  (* must include [destroyed_by_setstack ty] *)
+  DX :: FP0 :: nil.
 
 Definition destroyed_by_setstack (ty: typ): list mreg :=
   match ty with
-  | Tfloat => FP0 :: nil
-  | Tsingle => X7 :: FP0 :: nil
+  | Tfloat | Tsingle => FP0 :: nil
   | _ => nil
   end.
 
@@ -190,6 +182,7 @@ Definition two_address_op (op: operation) : bool :=
   | Omove => false
   | Ointconst _ => false
   | Ofloatconst _ => false
+  | Osingleconst _ => false
   | Oindirectsymbol _ => false
   | Ocast8signed => false
   | Ocast8unsigned => false
@@ -228,9 +221,18 @@ Definition two_address_op (op: operation) : bool :=
   | Osubf => true
   | Omulf => true
   | Odivf => true
+  | Onegfs => true
+  | Oabsfs => true
+  | Oaddfs => true
+  | Osubfs => true
+  | Omulfs => true
+  | Odivfs => true
   | Osingleoffloat => false
+  | Ofloatofsingle => false
   | Ointoffloat => false
   | Ofloatofint => false
+  | Ointofsingle => false
+  | Osingleofint => false
   | Omakelong => false
   | Olowlong => false
   | Ohighlong => false
diff --git a/ia32/NeedOp.v b/ia32/NeedOp.v
index 2c8698f..52b9fcb 100644
--- a/ia32/NeedOp.v
+++ b/ia32/NeedOp.v
@@ -37,6 +37,7 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Omove => op1 nv
   | Ointconst n => nil
   | Ofloatconst n => nil
+  | Osingleconst n => nil
   | Oindirectsymbol id => nil
   | Ocast8signed => op1 (sign_ext 8 nv)
   | Ocast8unsigned => op1 (zero_ext 8 nv)
@@ -66,8 +67,10 @@ Definition needs_of_operation (op: operation) (nv: nval): list nval :=
   | Olea addr => needs_of_addressing addr nv
   | Onegf | Oabsf => op1 (default nv)
   | Oaddf | Osubf | Omulf | Odivf => op2 (default nv)
-  | Osingleoffloat => op1 (singleoffloat nv)
-  | Ointoffloat | Ofloatofint => op1 (default nv)
+  | Onegfs | Oabsfs => op1 (default nv)
+  | Oaddfs | Osubfs | Omulfs | Odivfs => op2 (default nv)
+  | Osingleoffloat | Ofloatofsingle => op1 (default nv)
+  | Ointoffloat | Ofloatofint | Ointofsingle | Osingleofint => op1 (default nv)
   | Omakelong => op2 (default nv)
   | Olowlong | Ohighlong => op1 (default nv)
   | Ocmp c => needs_of_condition c
@@ -81,7 +84,6 @@ Definition operation_is_redundant (op: operation) (nv: nval): bool :=
   | Ocast16unsigned => zero_ext_redundant 16 nv
   | Oandimm n => andimm_redundant nv n
   | Oorimm n => orimm_redundant nv n
-  | Osingleoffloat => singleoffloat_redundant nv
   | _ => false
   end.
 
@@ -165,7 +167,6 @@ Proof.
 - apply shruimm_sound; auto. 
 - apply ror_sound; auto. 
 - eapply needs_of_addressing_sound; eauto.
-- apply singleoffloat_sound; auto. 
 - destruct (eval_condition c args m) as [b|] eqn:EC; simpl in H2. 
   erewrite needs_of_condition_sound by eauto.
   subst v; simpl. auto with na.
@@ -186,7 +187,6 @@ Proof.
 - apply zero_ext_redundant_sound; auto. omega.
 - apply andimm_redundant_sound; auto. 
 - apply orimm_redundant_sound; auto.
-- apply singleoffloat_redundant_sound; auto.
 Qed.
 
 End SOUNDNESS.
diff --git a/ia32/Op.v b/ia32/Op.v
index e46c740..14e4cbb 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -42,8 +42,10 @@ Inductive condition : Type :=
   | Ccompu: comparison -> condition     (**r unsigned integer comparison *)
   | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *)
   | Ccompuimm: comparison -> int -> condition  (**r unsigned integer comparison with a constant *)
-  | Ccompf: comparison -> condition     (**r floating-point comparison *)
+  | Ccompf: comparison -> condition     (**r 64-bit floating-point comparison *)
   | Cnotcompf: comparison -> condition  (**r negation of a floating-point comparison *)
+  | Ccompfs: comparison -> condition    (**r 32-bit floating-point comparison *)
+  | Cnotcompfs: comparison -> condition (**r negation of a floating-point comparison *)
   | Cmaskzero: int -> condition         (**r test [(arg & constant) == 0] *)
   | Cmasknotzero: int -> condition.     (**r test [(arg & constant) != 0] *)
 
@@ -68,6 +70,7 @@ Inductive operation : Type :=
   | Omove: operation                    (**r [rd = r1] *)
   | Ointconst: int -> operation         (**r [rd] is set to the given integer constant *)
   | Ofloatconst: float -> operation     (**r [rd] is set to the given float constant *)
+  | Osingleconst: float32 -> operation  (**r [rd] is set to the given float constant *)
   | Oindirectsymbol: ident -> operation (**r [rd] is set to the address of the symbol *)
 (*c Integer arithmetic: *)
   | Ocast8signed: operation             (**r [rd] is 8-bit sign extension of [r1] *)
@@ -108,10 +111,19 @@ Inductive operation : Type :=
   | Osubf: operation                    (**r [rd = r1 - r2] *)
   | Omulf: operation                    (**r [rd = r1 * r2] *)
   | Odivf: operation                    (**r [rd = r1 / r2] *)
+  | Onegfs: operation                   (**r [rd = - r1] *)
+  | Oabsfs: operation                   (**r [rd = abs(r1)] *)
+  | Oaddfs: operation                   (**r [rd = r1 + r2] *)
+  | Osubfs: operation                   (**r [rd = r1 - r2] *)
+  | Omulfs: operation                   (**r [rd = r1 * r2] *)
+  | Odivfs: operation                   (**r [rd = r1 / r2] *)
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
+  | Ofloatofsingle: operation           (**r [rd] is [r1] extended to double-precision float *)
 (*c Conversions between int and float: *)
-  | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
-  | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
+  | Ointoffloat: operation              (**r [rd = signed_int_of_float64(r1)] *)
+  | Ofloatofint: operation              (**r [rd = float64_of_signed_int(r1)] *)
+  | Ointofsingle: operation             (**r [rd = signed_int_of_float32(r1)] *)
+  | Osingleofint: operation             (**r [rd = float32_of_signed_int(r1)] *)
 (*c Manipulating 64-bit integers: *)
   | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
   | Olowlong: operation                 (**r [rd = low-word(r1)] *)
@@ -145,6 +157,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Float32.eq_dec; intro.
   generalize Int64.eq_dec; intro.
   decide equality.
   apply peq.
@@ -169,6 +182,8 @@ Definition eval_condition (cond: condition) (vl: list val) (m: mem): option bool
   | Ccompuimm c n, v1 :: nil => Val.cmpu_bool (Mem.valid_pointer m) c v1 (Vint n)
   | Ccompf c, v1 :: v2 :: nil => Val.cmpf_bool c v1 v2
   | Cnotcompf c, v1 :: v2 :: nil => option_map negb (Val.cmpf_bool c v1 v2)
+  | Ccompfs c, v1 :: v2 :: nil => Val.cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => option_map negb (Val.cmpfs_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => Val.maskzero_bool v1 n
   | Cmasknotzero n, v1 :: nil => option_map negb (Val.maskzero_bool v1 n)
   | _, _ => None
@@ -204,6 +219,7 @@ Definition eval_operation
   | Omove, v1::nil => Some v1
   | Ointconst n, nil => Some (Vint n)
   | Ofloatconst n, nil => Some (Vfloat n)
+  | Osingleconst n, nil => Some (Vsingle n)
   | Oindirectsymbol id, nil => Some (Genv.symbol_address genv id Int.zero)
   | Ocast8signed, v1 :: nil => Some (Val.sign_ext 8 v1)
   | Ocast8unsigned, v1 :: nil => Some (Val.zero_ext 8 v1)
@@ -243,9 +259,18 @@ Definition eval_operation
   | Osubf, v1::v2::nil => Some(Val.subf v1 v2)
   | Omulf, v1::v2::nil => Some(Val.mulf v1 v2)
   | Odivf, v1::v2::nil => Some(Val.divf v1 v2)
+  | Onegfs, v1::nil => Some(Val.negfs v1)
+  | Oabsfs, v1::nil => Some(Val.absfs v1)
+  | Oaddfs, v1::v2::nil => Some(Val.addfs v1 v2)
+  | Osubfs, v1::v2::nil => Some(Val.subfs v1 v2)
+  | Omulfs, v1::v2::nil => Some(Val.mulfs v1 v2)
+  | Odivfs, v1::v2::nil => Some(Val.divfs v1 v2)
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
+  | Ofloatofsingle, v1::nil => Some(Val.floatofsingle v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
+  | Ointofsingle, v1::nil => Val.intofsingle v1
+  | Osingleofint, v1::nil => Val.singleofint v1
   | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
   | Olowlong, v1::nil => Some(Val.loword v1)
   | Ohighlong, v1::nil => Some(Val.hiword v1)
@@ -275,6 +300,8 @@ Definition type_of_condition (c: condition) : list typ :=
   | Ccompuimm _ _ => Tint :: nil
   | Ccompf _ => Tfloat :: Tfloat :: nil
   | Cnotcompf _ => Tfloat :: Tfloat :: nil
+  | Ccompfs _ => Tsingle :: Tsingle :: nil
+  | Cnotcompfs _ => Tsingle :: Tsingle :: nil
   | Cmaskzero _ => Tint :: nil
   | Cmasknotzero _ => Tint :: nil
   end.
@@ -295,7 +322,8 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   match op with
   | Omove => (nil, Tint)   (* treated specially *)
   | Ointconst _ => (nil, Tint)
-  | Ofloatconst f => (nil, if Float.is_single_dec f then Tsingle else Tfloat)
+  | Ofloatconst f => (nil, Tfloat)
+  | Osingleconst f => (nil, Tsingle)
   | Oindirectsymbol _ => (nil, Tint)
   | Ocast8signed => (Tint :: nil, Tint)
   | Ocast8unsigned => (Tint :: nil, Tint)
@@ -334,9 +362,18 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osubf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Omulf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
+  | Onegfs => (Tsingle :: nil, Tsingle)
+  | Oabsfs => (Tsingle :: nil, Tsingle)
+  | Oaddfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Osubfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Omulfs => (Tsingle :: Tsingle :: nil, Tsingle)
+  | Odivfs => (Tsingle :: Tsingle :: nil, Tsingle)
   | Osingleoffloat => (Tfloat :: nil, Tsingle)
+  | Ofloatofsingle => (Tsingle :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Ointofsingle => (Tsingle :: nil, Tint)
+  | Osingleofint => (Tint :: nil, Tsingle)
   | Omakelong => (Tint :: Tint :: nil, Tlong)
   | Olowlong => (Tlong :: nil, Tint)
   | Ohighlong => (Tlong :: nil, Tint)
@@ -380,7 +417,8 @@ Proof with (try exact I).
   destruct op; simpl in H0; FuncInv; subst; simpl.
   congruence.
   exact I.
-  destruct (Float.is_single_dec f); auto. 
+  exact I.
+  exact I.
   unfold Genv.symbol_address; destruct (Genv.find_symbol genv i)...
   destruct v0...
   destruct v0...
@@ -422,8 +460,17 @@ Proof with (try exact I).
   destruct v0; destruct v1...
   destruct v0; destruct v1...
   destruct v0; destruct v1...
-  destruct v0... apply Float.singleoffloat_is_single.
-  destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
+  destruct v0...
+  destruct v0...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
+  destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2...
+  destruct v0; simpl in H0; inv H0...
+  destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2...
   destruct v0; simpl in H0; inv H0...
   destruct v0; destruct v1...
   destruct v0...
@@ -467,6 +514,8 @@ Definition negate_condition (cond: condition): condition :=
   | Ccompuimm c n => Ccompuimm (negate_comparison c) n
   | Ccompf c => Cnotcompf c
   | Cnotcompf c => Ccompf c
+  | Ccompfs c => Cnotcompfs c
+  | Cnotcompfs c => Ccompfs c
   | Cmaskzero n => Cmasknotzero n
   | Cmasknotzero n => Cmaskzero n
   end.
@@ -482,6 +531,8 @@ Proof.
   repeat (destruct vl; auto). apply Val.negate_cmpu_bool.
   repeat (destruct vl; auto). 
   repeat (destruct vl; auto). destruct (Val.cmpf_bool c v v0) as [[]|]; auto.
+  repeat (destruct vl; auto). 
+  repeat (destruct vl; auto). destruct (Val.cmpfs_bool c v v0) as [[]|]; auto.
   destruct vl; auto. destruct vl; auto.
   destruct vl; auto. destruct vl; auto. destruct (Val.maskzero_bool v i) as [[]|]; auto.
 Qed.
@@ -608,61 +659,6 @@ Proof.
   destruct c; simpl; try congruence. reflexivity. 
 Qed.
 
-(** Checking whether two addressings, applied to the same arguments, produce
-  separated memory addresses.  Used in [CSE]. *)
-
-Definition addressing_separated (chunk1: memory_chunk) (addr1: addressing)
-                               (chunk2: memory_chunk) (addr2: addressing) : bool :=
-  match addr1, addr2 with
-  | Aindexed ofs1, Aindexed ofs2 => 
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | Aglobal s1 ofs1, Aglobal s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Abased s1 ofs1, Abased s2 ofs2 => 
-      if ident_eq s1 s2 then Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2) else true
-  | Ainstack ofs1, Ainstack ofs2 =>
-      Int.no_overlap ofs1 (size_chunk chunk1) ofs2 (size_chunk chunk2)
-  | _, _ => false
-  end.
-
-Lemma addressing_separated_sound:
-  forall (F V: Type) (ge: Genv.t F V) sp chunk1 addr1 chunk2 addr2 vl b1 n1 b2 n2,
-  addressing_separated chunk1 addr1 chunk2 addr2 = true ->
-  eval_addressing ge sp addr1 vl = Some(Vptr b1 n1) ->
-  eval_addressing ge sp addr2 vl = Some(Vptr b2 n2) ->
-  b1 <> b2 \/ Int.unsigned n1 + size_chunk chunk1 <= Int.unsigned n2 \/ Int.unsigned n2 + size_chunk chunk2 <= Int.unsigned n1.
-Proof.
-  unfold addressing_separated; intros.
-  generalize (size_chunk_pos chunk1) (size_chunk_pos chunk2); intros SZ1 SZ2.
-  destruct addr1; destruct addr2; try discriminate; simpl in *; FuncInv.
-(* Aindexed *)
-  destruct v; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-(* Aglobal *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  replace (Int.unsigned n1) with (Int.unsigned (Int.add Int.zero n1)).
-  replace (Int.unsigned n2) with (Int.unsigned (Int.add Int.zero n2)).
-  right. apply Int.no_overlap_sound; auto. 
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  rewrite Int.add_commut; rewrite Int.add_zero; auto.
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Abased *)
-  unfold Genv.symbol_address in *. 
-  destruct (Genv.find_symbol ge i1) eqn:?; simpl in *; try discriminate.
-  destruct v; inv H2.
-  destruct (Genv.find_symbol ge i) eqn:?; inv H1.
-  destruct (ident_eq i i1). subst.
-  rewrite (Int.add_commut i0 i3). rewrite (Int.add_commut i2 i3).
-  right. apply Int.no_overlap_sound; auto. 
-  left. red; intros; elim n. subst. eapply Genv.genv_vars_inj; eauto.
-(* Ainstack *)
-  destruct sp; simpl in *; inv H1; inv H2.
-  right. apply Int.no_overlap_sound; auto. 
-Qed.
-
 (** * Invariance and compatibility properties. *)
 
 (** [eval_operation] and [eval_addressing] depend on a global environment
@@ -770,6 +766,8 @@ Proof.
   eauto 3 using val_cmpu_bool_inject, Mem.valid_pointer_implies.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; try discriminate; auto.
   inv H3; try discriminate; auto.
 Qed.
@@ -853,7 +851,17 @@ Proof.
   inv H4; inv H2; simpl; auto.
   inv H4; inv H2; simpl; auto.
   inv H4; simpl; auto.
-  inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
+  inv H4; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2.
+  exists (Vint i); auto.
+  inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in H1; inv H1. simpl. TrivialExists.
   inv H4; inv H2; simpl; auto.
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 33e19f7..67fb80c 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -353,7 +353,7 @@ let print_builtin_vload_common oc chunk addr res =
         fprintf oc "	movl	%a, %a\n" addressing addr ireg res2
       end
   | Mfloat32, [FR res] ->
-      fprintf oc "	cvtss2sd %a, %a\n" addressing addr freg res
+      fprintf oc "	movss	%a, %a\n" addressing addr freg res
   | Mfloat64, [FR res] ->
       fprintf oc "	movsd	%a, %a\n" addressing addr freg res
   | _ ->
@@ -394,8 +394,7 @@ let print_builtin_vstore_common oc chunk addr src tmp =
       fprintf oc "	movl	%a, %a\n" ireg src2 addressing addr;
       fprintf oc "	movl	%a, %a\n" ireg src1 addressing addr'
   | Mfloat32, [FR src] ->
-      fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg src;
-      fprintf oc "	movss	%%xmm7, %a\n" addressing addr
+      fprintf oc "	movss	%a, %a\n" freg src addressing addr
   | Mfloat64, [FR src] ->
       fprintf oc "	movsd	%a, %a\n" freg src addressing addr
   | _ ->
@@ -541,7 +540,8 @@ let print_builtin_inline oc name args res =
 
 (* Printing of instructions *)
 
-let float_literals : (int * int64) list ref = ref []
+let float64_literals : (int * int64) list ref = ref []
+let float32_literals : (int * int32) list ref = ref []
 let jumptables : (int * label list) list ref = ref []
 let indirect_symbols : StringSet.t ref = ref StringSet.empty
 
@@ -560,39 +560,38 @@ let print_instruction oc = function
         fprintf oc "	movl	L%a$non_lazy_ptr, %a\n" raw_symbol id ireg rd
       end else
         fprintf oc "	movl	$%a, %a\n" symbol id ireg rd
-  | Pmov_rm(rd, a) ->
+  | Pmov_rm(rd, a) | Pmov_rm_a(rd, a) ->
       fprintf oc "	movl	%a, %a\n" addressing a ireg rd
-  | Pmov_mr(a, r1) ->
+  | Pmov_mr(a, r1) | Pmov_mr_a(a, r1) ->
       fprintf oc "	movl	%a, %a\n" ireg r1 addressing a
   | Pmovsd_ff(rd, r1) ->
       fprintf oc "	movapd	%a, %a\n" freg r1 freg rd
   | Pmovsd_fi(rd, n) ->
-      let b = camlint64_of_coqint (Floats.Float.bits_of_double n) in
+      let b = camlint64_of_coqint (Floats.Float.to_bits n) in
       let lbl = new_label() in
       fprintf oc "	movsd	%a, %a %s %.18g\n" label lbl freg rd comment (camlfloat_of_coqfloat n);
-      float_literals := (lbl, b) :: !float_literals
+      float64_literals := (lbl, b) :: !float64_literals
   | Pmovsd_fm(rd, a) ->
       fprintf oc "	movsd	%a, %a\n" addressing a freg rd
   | Pmovsd_mf(a, r1) ->
       fprintf oc "	movsd	%a, %a\n" freg r1 addressing a
-  | Pfld_f(r1) ->
-      fprintf oc "	subl	$8, %%esp\n";
-      cfi_adjust oc 8l;
-      fprintf oc "	movsd	%a, 0(%%esp)\n" freg r1;
-      fprintf oc "	fldl	0(%%esp)\n";
-      fprintf oc "	addl	$8, %%esp\n";
-      cfi_adjust oc (-8l)
-  | Pfld_m(a) ->
+  | Pmovss_fi(rd, n) ->
+      let b = camlint_of_coqint (Floats.Float32.to_bits n) in
+      let lbl = new_label() in
+      fprintf oc "	movss	%a, %a %s %.18g\n" label lbl freg rd comment (camlfloat_of_coqfloat32 n);
+      float32_literals := (lbl, b) :: !float32_literals
+  | Pmovss_fm(rd, a) ->
+      fprintf oc "	movss	%a, %a\n" addressing a freg rd
+  | Pmovss_mf(a, r1) ->
+      fprintf oc "	movss	%a, %a\n" freg r1 addressing a
+  | Pfldl_m(a) ->
       fprintf oc "	fldl	%a\n" addressing a
-  | Pfstp_f(rd) ->
-      fprintf oc "	subl	$8, %%esp\n";
-      cfi_adjust oc 8l;
-      fprintf oc "	fstpl	0(%%esp)\n";
-      fprintf oc "	movsd	0(%%esp), %a\n" freg rd;
-      fprintf oc "	addl	$8, %%esp\n";
-      cfi_adjust oc (-8l)
-  | Pfstp_m(a) ->
+  | Pfstpl_m(a) ->
       fprintf oc "	fstpl	%a\n" addressing a
+  | Pflds_m(a) ->
+      fprintf oc "	flds	%a\n" addressing a
+  | Pfstps_m(a) ->
+      fprintf oc "	fstps	%a\n" addressing a
   | Pxchg_rr(r1, r2) ->
       fprintf oc "	xchgl	%a, %a\n" ireg r1 ireg r2
   (** Moves with conversion *)
@@ -616,18 +615,18 @@ let print_instruction oc = function
       fprintf oc "	movswl	%a, %a\n" ireg16 r1 ireg rd
   | Pmovsw_rm(rd, a) ->
       fprintf oc "	movswl	%a, %a\n" addressing a ireg rd
-  | Pcvtss2sd_fm(rd, a) ->
-      fprintf oc "	cvtss2sd %a, %a\n" addressing a freg rd
   | Pcvtsd2ss_ff(rd, r1) ->
-      fprintf oc "	cvtsd2ss %a, %a\n" freg r1 freg rd;
-      fprintf oc "	cvtss2sd %a, %a\n" freg rd freg rd
-  | Pcvtsd2ss_mf(a, r1) ->
-      fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg r1;
-      fprintf oc "	movss	%%xmm7, %a\n" addressing a
+      fprintf oc "	cvtsd2ss %a, %a\n" freg r1 freg rd
+  | Pcvtss2sd_ff(rd, r1) ->
+      fprintf oc "	cvtss2sd %a, %a\n" freg r1 freg rd
   | Pcvttsd2si_rf(rd, r1) ->
       fprintf oc "	cvttsd2si %a, %a\n" freg r1 ireg rd
   | Pcvtsi2sd_fr(rd, r1) ->
       fprintf oc "	cvtsi2sd %a, %a\n" ireg r1 freg rd
+  | Pcvttss2si_rf(rd, r1) ->
+      fprintf oc "	cvttss2si %a, %a\n" freg r1 ireg rd
+  | Pcvtsi2ss_fr(rd, r1) ->
+      fprintf oc "	cvtsi2ss %a, %a\n" ireg r1 freg rd
   (** Arithmetic and logical operations over integers *)
   | Plea(rd, a) ->
       fprintf oc "	leal	%a, %a\n" addressing a ireg rd
@@ -713,6 +712,24 @@ let print_instruction oc = function
       fprintf oc "	comisd	%a, %a\n" freg r2 freg r1
   | Pxorpd_f (rd) ->
       fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
+  | Padds_ff(rd, r1) ->
+      fprintf oc "	addss	%a, %a\n" freg r1 freg rd
+  | Psubs_ff(rd, r1) ->
+      fprintf oc "	subss	%a, %a\n" freg r1 freg rd
+  | Pmuls_ff(rd, r1) ->
+      fprintf oc "	mulss	%a, %a\n" freg r1 freg rd
+  | Pdivs_ff(rd, r1) ->
+      fprintf oc "	divss	%a, %a\n" freg r1 freg rd
+  | Pnegs (rd) ->
+      need_masks := true;
+      fprintf oc "	xorpd	%a, %a\n" raw_symbol "__negs_mask" freg rd
+  | Pabss (rd) ->
+      need_masks := true;
+      fprintf oc "	andpd	%a, %a\n" raw_symbol "__abss_mask" freg rd
+  | Pcomiss_ff(r1, r2) ->
+      fprintf oc "	comiss	%a, %a\n" freg r2 freg r1
+  | Pxorps_f (rd) ->
+      fprintf oc "	xorpd	%a, %a\n" freg rd freg rd
   (** Branches and calls *)
   | Pjmp_l(l) ->
       fprintf oc "	jmp	%a\n" label (transl_label l)
@@ -785,8 +802,10 @@ let print_instruction oc = function
           assert false
       end
 
-let print_literal oc (lbl, n) =
+let print_literal64 oc (lbl, n) =
   fprintf oc "%a:	.quad	0x%Lx\n" label lbl n
+let print_literal32 oc (lbl, n) =
+  fprintf oc "%a:	.long	0x%lx\n" label lbl n
 
 let print_jumptable oc (lbl, tbl) =
   fprintf oc "%a:" label lbl;
@@ -796,7 +815,7 @@ let print_jumptable oc (lbl, tbl) =
 
 let print_function oc name fn =
   Hashtbl.clear current_function_labels;
-  float_literals := [];
+  float64_literals := []; float32_literals := [];
   jumptables := [];
   current_function_sig := fn.fn_sig;
   let (text, lit, jmptbl) =
@@ -818,11 +837,13 @@ let print_function oc name fn =
     fprintf oc "	.type	%a, @function\n" symbol name;
     fprintf oc "	.size	%a, . - %a\n" symbol name symbol name
   end;
-  if !float_literals <> [] then begin
+  if !float64_literals <> [] || !float32_literals <> [] then begin
     section oc lit;
     print_align oc 8;
-    List.iter (print_literal oc) !float_literals;
-    float_literals := []
+    List.iter (print_literal64 oc) !float64_literals;
+    float64_literals := [];
+    List.iter (print_literal32 oc) !float32_literals;
+    float32_literals := []
   end;
   if !jumptables <> [] then begin
     section oc jmptbl;
@@ -842,11 +863,11 @@ let print_init oc = function
       fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
       fprintf oc "	.long	%ld %s %.18g\n"
-	(camlint_of_coqint (Floats.Float.bits_of_single n))
+	(camlint_of_coqint (Floats.Float32.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_float64 n ->
       fprintf oc "	.quad	%Ld %s %.18g\n"
-	(camlint64_of_coqint (Floats.Float.bits_of_double n))
+	(camlint64_of_coqint (Floats.Float.to_bits n))
 	comment (camlfloat_of_coqfloat n)
   | Init_space n ->
       if Z.gt n Z.zero then
@@ -917,7 +938,11 @@ let print_program oc p =
     fprintf oc "%a:	.quad   0x8000000000000000, 0\n"
                raw_symbol "__negd_mask";
     fprintf oc "%a:	.quad   0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF\n"
-               raw_symbol "__absd_mask"
+               raw_symbol "__absd_mask";
+    fprintf oc "%a:	.long   0x80000000, 0, 0, 0\n"
+               raw_symbol "__negs_mask";
+    fprintf oc "%a:	.long   0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF\n"
+               raw_symbol "__abss_mask"
   end;
   if target = MacOS then begin
     fprintf oc "	.section __IMPORT,__pointers,non_lazy_symbol_pointers\n";
diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml
index 193779e..fb9a7cc 100644
--- a/ia32/PrintOp.ml
+++ b/ia32/PrintOp.ml
@@ -64,6 +64,7 @@ let print_operation reg pp = function
   | Omove, [r1] -> reg pp r1
   | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
   | Ofloatconst n, [] -> fprintf pp "%F" (camlfloat_of_coqfloat n)
+  | Osingleconst n, [] -> fprintf pp "%Ff" (camlfloat_of_coqfloat32 n)
   | Oindirectsymbol id, [] -> fprintf pp "&%s" (extern_atom id)
   | Ocast8signed, [r1] -> fprintf pp "int8signed(%a)" reg r1
   | Ocast8unsigned, [r1] -> fprintf pp "int8unsigned(%a)" reg r1
@@ -100,6 +101,7 @@ let print_operation reg pp = function
   | Omulf, [r1;r2] -> fprintf pp "%a *f %a" reg r1 reg r2
   | Odivf, [r1;r2] -> fprintf pp "%a /f %a" reg r1 reg r2
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
+  | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
   | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp
index 214608e..b6aef72 100644
--- a/ia32/SelectOp.vp
+++ b/ia32/SelectOp.vp
@@ -346,6 +346,12 @@ Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 
+Definition negfs (e: expr) :=  Eop Onegfs (e ::: Enil).
+Definition absfs (e: expr) :=  Eop Oabsfs (e ::: Enil).
+Definition addfs (e1 e2: expr) :=  Eop Oaddfs (e1 ::: e2 ::: Enil).
+Definition subfs (e1 e2: expr) :=  Eop Osubfs (e1 ::: e2 ::: Enil).
+Definition mulfs (e1 e2: expr) :=  Eop Omulfs (e1 ::: e2 ::: Enil).
+
 (** ** Comparisons *)
 
 Nondetfunction compimm (default: comparison -> int -> condition)
@@ -405,6 +411,9 @@ Nondetfunction compu (c: comparison) (e1: expr) (e2: expr) :=
 Definition compf (c: comparison) (e1: expr) (e2: expr) :=
   Eop (Ocmp (Ccompf c)) (e1 ::: e2 ::: Enil).
 
+Definition compfs (c: comparison) (e1: expr) (e2: expr) :=
+  Eop (Ocmp (Ccompfs c)) (e1 ::: e2 ::: Enil).
+
 (** ** Integer conversions *)
 
 Nondetfunction cast8unsigned (e: expr) := 
@@ -446,32 +455,50 @@ Nondetfunction cast16signed (e: expr) :=
 (** Floating-point conversions *)
 
 Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
+Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil).
+
 Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
 
 Nondetfunction floatofint (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofint n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_int n)) Enil
   | _ => Eop Ofloatofint (e ::: Enil)
   end.
 
 Definition intuoffloat (e: expr) :=
   Elet e
-    (Elet (Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil)
+    (Elet (Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil)
       (Econdition (CEcond (Ccompf Clt) (Eletvar 1 ::: Eletvar 0 ::: Enil))
         (intoffloat (Eletvar 1))
         (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar 1) (Eletvar 0))))))%nat.
 
 Nondetfunction floatofintu (e: expr) :=
   match e with
-  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.floatofintu n)) Enil
+  | Eop (Ointconst n) Enil => Eop (Ofloatconst (Float.of_intu n)) Enil
   | _ =>
-    let f := Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil in
+    let f := Eop (Ofloatconst (Float.of_intu Float.ox8000_0000)) Enil in
     Elet e
       (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
         (floatofint (Eletvar O))
         (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f))
   end.
 
+Definition intofsingle (e: expr) := Eop Ointofsingle (e ::: Enil).
+
+Nondetfunction singleofint (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_int n)) Enil
+  | _ => Eop Osingleofint (e ::: Enil)
+  end.
+
+Definition intuofsingle (e: expr) := intuoffloat (floatofsingle e).
+
+Nondetfunction singleofintu (e: expr) :=
+  match e with
+  | Eop (Ointconst n) Enil => Eop (Osingleconst (Float32.of_intu n)) Enil
+  | _ => singleoffloat (floatofintu e)
+  end.
+
 (** ** Addressing modes *)
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v
index 0fd6f7b..5068862 100644
--- a/ia32/SelectOpproof.v
+++ b/ia32/SelectOpproof.v
@@ -567,6 +567,31 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
+Theorem eval_negfs: unary_constructor_sound negfs Val.negfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_absfs: unary_constructor_sound absfs Val.absfs.
+Proof.
+  red; intros. TrivialExists. 
+Qed.
+
+Theorem eval_addfs: binary_constructor_sound addfs Val.addfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+ 
+Theorem eval_subfs: binary_constructor_sound subfs Val.subfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
+Theorem eval_mulfs: binary_constructor_sound mulfs Val.mulfs.
+Proof.
+  red; intros; TrivialExists.
+Qed.
+
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
@@ -674,6 +699,12 @@ Proof.
   intros; red; intros. unfold compf. TrivialExists.
 Qed.
 
+Theorem eval_compfs:
+  forall c, binary_constructor_sound (compfs c) (Val.cmpfs c).
+Proof.
+  intros; red; intros. unfold compfs. TrivialExists.
+Qed.
+
 Theorem eval_cast8signed: unary_constructor_sound cast8signed (Val.sign_ext 8).
 Proof.
   red; intros until x. unfold cast8signed. case (cast8signed_match a); intros; InvEval.
@@ -711,6 +742,11 @@ Proof.
   red; intros. unfold singleoffloat. TrivialExists.
 Qed.
 
+Theorem eval_floatofsingle: unary_constructor_sound floatofsingle Val.floatofsingle.
+Proof.
+  red; intros. unfold floatofsingle. TrivialExists.
+Qed.
+
 Theorem eval_intoffloat:
   forall le a x y,
   eval_expr ge sp e m le a x ->
@@ -738,10 +774,10 @@ Theorem eval_intuoffloat:
   exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v.
 Proof.
   intros. destruct x; simpl in H0; try discriminate.
-  destruct (Float.intuoffloat f) as [n|] eqn:?; simpl in H0; inv H0.
+  destruct (Float.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
   exists (Vint n); split; auto. unfold intuoffloat.
   set (im := Int.repr Int.half_modulus).
-  set (fm := Float.floatofintu im).
+  set (fm := Float.of_intu im).
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar (S O)) (Vfloat f)).
     constructor. auto.
   assert (eval_expr ge sp e m (Vfloat fm :: Vfloat f :: le) (Eletvar O) (Vfloat fm)).
@@ -751,9 +787,9 @@ Proof.
   eapply eval_Econdition with (va := Float.cmp Clt f fm).
   eauto with evalexpr.
   destruct (Float.cmp Clt f fm) eqn:?.
-  exploit Float.intuoffloat_intoffloat_1; eauto. intro EQ.
+  exploit Float.to_intu_to_int_1; eauto. intro EQ.
   EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.intuoffloat_intoffloat_2; eauto. 
+  exploit Float.to_intu_to_int_2; eauto. 
   change Float.ox8000_0000 with im. fold fm. intro EQ.
   set (t2 := subf (Eletvar (S O)) (Eletvar O)).
   set (t3 := intoffloat t2).
@@ -778,25 +814,75 @@ Proof.
   intros until y; unfold floatofintu. case (floatofintu_match a); intros.
   InvEval. TrivialExists.
   destruct x; simpl in H0; try discriminate. inv H0.
-  exists (Vfloat (Float.floatofintu i)); split; auto.
+  exists (Vfloat (Float.of_intu i)); split; auto.
   econstructor. eauto. 
-  set (fm := Float.floatofintu Float.ox8000_0000).
+  set (fm := Float.of_intu Float.ox8000_0000).
   assert (eval_expr ge sp e m (Vint i :: le) (Eletvar O) (Vint i)).
     constructor. auto. 
   eapply eval_Econdition with (va := Int.ltu i Float.ox8000_0000).
   eauto with evalexpr.
   destruct (Int.ltu i Float.ox8000_0000) eqn:?.
-  rewrite Float.floatofintu_floatofint_1; auto.
+  rewrite Float.of_intu_of_int_1; auto.
   unfold floatofint. EvalOp. 
   exploit (eval_addimm (Int.neg Float.ox8000_0000) (Vint i :: le) (Eletvar 0)); eauto.
   simpl. intros [v [A B]]. inv B. 
   unfold addf. EvalOp. 
   constructor. unfold floatofint. EvalOp. simpl; eauto. 
   constructor. EvalOp. simpl; eauto. constructor. simpl; eauto. 
-  fold fm. rewrite Float.floatofintu_floatofint_2; auto.
+  fold fm. rewrite Float.of_intu_of_int_2; auto.
   rewrite Int.sub_add_opp. auto.
 Qed.
 
+Theorem eval_intofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros; unfold intofsingle. TrivialExists. 
+Qed.
+
+Theorem eval_singleofint:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofint x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofint a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval.
+  TrivialExists.
+  TrivialExists. 
+Qed.
+
+Theorem eval_intuofsingle:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.intuofsingle x = Some y ->
+  exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v.
+Proof.
+  intros. destruct x; simpl in H0; try discriminate.
+  destruct (Float32.to_intu f) as [n|] eqn:?; simpl in H0; inv H0.
+  unfold intuofsingle. apply eval_intuoffloat with (Vfloat (Float.of_single f)).
+  unfold floatofsingle. EvalOp. 
+  simpl. change (Float.of_single f) with (Float32.to_double f). 
+  erewrite Float32.to_intu_double; eauto. auto. 
+Qed.
+
+Theorem eval_singleofintu:
+  forall le a x y,
+  eval_expr ge sp e m le a x ->
+  Val.singleofintu x = Some y ->
+  exists v, eval_expr ge sp e m le (singleofintu a) v /\ Val.lessdef y v.
+Proof.
+  intros until y; unfold singleofintu. case (singleofintu_match a); intros.
+  InvEval. TrivialExists.
+  destruct x; simpl in H0; try discriminate. inv H0.
+  exploit eval_floatofintu. eauto. simpl. reflexivity. 
+  intros (v & A & B).
+  exists (Val.singleoffloat v); split.
+  unfold singleoffloat; EvalOp.
+  inv B; simpl. rewrite Float32.of_intu_double. auto. 
+Qed.
+
 Theorem eval_addressing:
   forall le chunk a v b ofs,
   eval_expr ge sp e m le a v ->
diff --git a/ia32/Unusedglob1.ml b/ia32/Unusedglob1.ml
index 8332a30..eb0298b 100644
--- a/ia32/Unusedglob1.ml
+++ b/ia32/Unusedglob1.ml
@@ -29,12 +29,14 @@ let referenced_builtin ef =
 
 let referenced_instr = function
   | Pmov_rm (_, a) | Pmov_mr (a, _)
+  | Pmov_rm_a (_, a) | Pmov_mr_a (a, _)
   | Pmovsd_fm (_, a) | Pmovsd_mf(a, _)
-  | Pfld_m a | Pfstp_m a
+  | Pmovss_fm (_, a) | Pmovss_mf(a, _)
+  | Pfldl_m a | Pflds_m a | Pfstpl_m a | Pfstps_m a
   | Pmovb_mr (a, _) | Pmovw_mr (a, _)
   | Pmovzb_rm (_, a) | Pmovsb_rm (_, a)
   | Pmovzw_rm (_, a) | Pmovsw_rm (_, a)
-  | Pcvtss2sd_fm (_, a) | Pcvtsd2ss_mf (a, _) | Plea (_, a) -> referenced_addr a
+  | Plea (_, a) -> referenced_addr a
   | Pjmp_s(s, _) -> [s]
   | Pcall_s(s, _) -> [s]
   | Pbuiltin(ef, args, res) -> referenced_builtin ef
diff --git a/ia32/ValueAOp.v b/ia32/ValueAOp.v
index 58b945f..874c2be 100644
--- a/ia32/ValueAOp.v
+++ b/ia32/ValueAOp.v
@@ -32,6 +32,8 @@ Definition eval_static_condition (cond: condition) (vl: list aval): abool :=
   | Ccompuimm c n, v1 :: nil => cmpu_bool c v1 (I n)
   | Ccompf c, v1 :: v2 :: nil => cmpf_bool c v1 v2
   | Cnotcompf c, v1 :: v2 :: nil => cnot (cmpf_bool c v1 v2)
+  | Ccompfs c, v1 :: v2 :: nil => cmpfs_bool c v1 v2
+  | Cnotcompfs c, v1 :: v2 :: nil => cnot (cmpfs_bool c v1 v2)
   | Cmaskzero n, v1 :: nil => maskzero v1 n
   | Cmasknotzero n, v1 :: nil => cnot (maskzero v1 n)
   | _, _ => Bnone
@@ -55,6 +57,7 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Omove, v1::nil => v1
   | Ointconst n, nil => I n
   | Ofloatconst n, nil => if propagate_float_constants tt then F n else ftop
+  | Osingleconst n, nil => if propagate_float_constants tt then FS n else ftop
   | Oindirectsymbol id, nil => Ptr (Gl id Int.zero)
   | Ocast8signed, v1 :: nil => sign_ext 8 v1
   | Ocast8unsigned, v1 :: nil => zero_ext 8 v1
@@ -93,9 +96,18 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval :=
   | Osubf, v1::v2::nil => subf v1 v2
   | Omulf, v1::v2::nil => mulf v1 v2
   | Odivf, v1::v2::nil => divf v1 v2
+  | Onegfs, v1::nil => negfs v1
+  | Oabsfs, v1::nil => absfs v1
+  | Oaddfs, v1::v2::nil => addfs v1 v2
+  | Osubfs, v1::v2::nil => subfs v1 v2
+  | Omulfs, v1::v2::nil => mulfs v1 v2
+  | Odivfs, v1::v2::nil => divfs v1 v2
   | Osingleoffloat, v1::nil => singleoffloat v1
+  | Ofloatofsingle, v1::nil => floatofsingle v1
   | Ointoffloat, v1::nil => intoffloat v1
   | Ofloatofint, v1::nil => floatofint v1
+  | Ointofsingle, v1::nil => intofsingle v1
+  | Osingleofint, v1::nil => singleofint v1
   | Omakelong, v1::v2::nil => longofwords v1 v2
   | Olowlong, v1::nil => loword v1
   | Ohighlong, v1::nil => hiword v1
@@ -164,6 +176,7 @@ Proof.
   unfold eval_operation, eval_static_operation; intros;
   destruct op; InvHyps; eauto with va.
   destruct (propagate_float_constants tt); constructor.
+  destruct (propagate_float_constants tt); constructor.
   eapply eval_static_addressing_sound; eauto.
   apply of_optbool_sound. eapply eval_static_condition_sound; eauto. 
 Qed.
diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v
index e097e85..d1f7acd 100644
--- a/ia32/standard/Conventions1.v
+++ b/ia32/standard/Conventions1.v
@@ -15,6 +15,7 @@
 
 Require Import Coqlib.
 Require Import AST.
+Require Import Events.
 Require Import Locations.
 
 (** * Classification of machine registers *)
@@ -161,13 +162,13 @@ Proof.
 Qed.
 
 Lemma int_callee_save_type:
-  forall r, In r int_callee_save_regs -> mreg_type r = Tint.
+  forall r, In r int_callee_save_regs -> mreg_type r = Tany32.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
 
 Lemma float_callee_save_type:
-  forall r, In r float_callee_save_regs -> mreg_type r = Tfloat.
+  forall r, In r float_callee_save_regs -> mreg_type r = Tany64.
 Proof.
   intro. simpl; ElimOrEq; reflexivity.
 Qed.
@@ -219,25 +220,20 @@ Qed.
 Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
   | None => AX :: nil
-  | Some Tint => AX :: nil
+  | Some (Tint | Tany32) => AX :: nil
   | Some (Tfloat | Tsingle) => FP0 :: nil
+  | Some Tany64 => X0 :: nil
   | Some Tlong => DX :: AX :: nil
   end.
 
-(*
-(** The result location has the type stated in the signature. *)
+(** The result registers have types compatible with that given in the signature. *)
 
 Lemma loc_result_type:
   forall sig,
-  mreg_type (loc_result sig) =
-  match sig.(sig_res) with None => Tint | Some ty => ty end.
+  subtype_list (proj_sig_res' sig) (map mreg_type (loc_result sig)) = true.
 Proof.
-  intros; unfold loc_result.
-  destruct (sig_res sig). 
-  destruct t; reflexivity.
-  reflexivity.
+  intros. unfold proj_sig_res', loc_result. destruct (sig_res sig) as [[]|]; auto.
 Qed.
-*)
 
 (** The result locations are caller-save registers *)
 
@@ -246,9 +242,9 @@ Lemma loc_result_caller_save:
   In r (loc_result s) -> In r destroyed_at_call.
 Proof.
   intros.
-  assert (r = AX \/ r = DX \/ r = FP0).
-    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
-  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
+  assert (r = AX \/ r = DX \/ r = FP0 \/ r = X0).
+    unfold loc_result in H. destruct (sig_res s) as [[]|]; simpl in H; intuition.
+  destruct H0 as [A | [A | [A | A]]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -263,6 +259,8 @@ Fixpoint loc_arguments_rec
   | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2)
   | Tsingle :: tys => S Outgoing ofs Tsingle :: loc_arguments_rec tys (ofs + 1)
   | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2)
+  | Tany32 :: tys => S Outgoing ofs Tany32 :: loc_arguments_rec tys (ofs + 1)
+  | Tany64 :: tys => S Outgoing ofs Tany64 :: loc_arguments_rec tys (ofs + 2)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
@@ -303,21 +301,18 @@ Remark loc_arguments_rec_charact:
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
-  destruct H.
-  destruct a.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l; split; [omega|congruence].
-  destruct H. subst l; split; [omega|congruence].
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
-- destruct H. subst l. split. omega. congruence.
-  exploit IHtyl; eauto.
-  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H.
+- assert (REC: forall ofs1, In l (loc_arguments_rec tyl ofs1) -> ofs1 > ofs -> 
+               match l with
+               | R _ => False
+               | S Local _ _ => False
+               | S Incoming _ _ => False
+               | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
+               end).
+  { intros. exploit IHtyl; eauto. destruct l; auto. destruct sl; intuition omega
+. }
+  destruct a; simpl in H; repeat (destruct H);
+  ((eapply REC; eauto; omega) || (split; [omega|congruence])).
 Qed.
 
 Lemma loc_arguments_acceptable:
@@ -357,16 +352,15 @@ Lemma loc_arguments_bounded:
 Proof.
   intros until ty. unfold loc_arguments, size_arguments. generalize (sig_args s) 0.
   induction l; simpl; intros.
-  destruct H.
-  destruct a.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
-  destruct H. inv H.
+- contradiction.
+- Ltac decomp :=
+  match goal with
+  | [ H: _ \/ _ |- _ ] => destruct H; decomp
+  | [ H: S _ _ _ = S _ _ _ |- _ ] => inv H
+  | _ => idtac
+  end.
+  destruct a; simpl in H; decomp; auto; try apply size_arguments_rec_above.
   simpl typesize. replace (z + 1 + 1) with (z + 2) by omega. apply size_arguments_rec_above.
-  destruct H. inv H. 
-  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. 
-  auto.
-  destruct H. inv H. apply size_arguments_rec_above. auto.
+  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above.
 Qed.
 
-
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-07-23 08:54:56 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2014-07-23 08:54:56 +0000
commit	2a0168fea37b68ad14e2cb60bf215111e49d4870 (patch)
tree	2f59373790d8ce3a5df66ef7a692271cf0666c6c /ia32
parent	00805153cf9b88aa07cc6694b17d93f5ba2e7de8 (diff)