From 76ea1108be6f8b4ba9dc0118a13f685bcb62bc2b Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sat, 14 Sep 2013 16:24:30 +0000
Subject: Floats.v, Nan.v: hard-wire the general shape of binop_pl, so that no
 axioms are necessary, only two parameters (default_pl and choose_binop_pl).
 SelectDiv: optimize FP division by a power of 2. ConstpropOp: optimize 2.0 *
 x and x * 2.0 into x + x.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2326 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 Changelog                  |   5 ++
 arm/ConstpropOp.vp         |   7 +++
 arm/ConstpropOpproof.v     |  32 +++++++++++
 arm/Nan.v                  |  47 +++++++--------
 backend/Constprop.v        |   4 +-
 backend/Constpropproof.v   |  12 ++--
 backend/SelectDiv.vp       |  18 +++++-
 backend/SelectDivproof.v   |  18 ++++++
 extraction/extraction.v    |   6 +-
 ia32/ConstpropOp.vp        |  13 ++++-
 ia32/ConstpropOpproof.v    |  36 ++++++++++++
 ia32/Nan.v                 |  38 ++++++------
 ia32/SelectOp.vp           |  13 ++++-
 ia32/SelectOpproof.v       |   9 ++-
 lib/Floats.v               | 140 ++++++++++++++++++++++++++++++++++++++-------
 powerpc/ConstpropOp.vp     |   7 +++
 powerpc/ConstpropOpproof.v |  32 +++++++++++
 powerpc/Nan.v              |  38 ++++++------
 powerpc/SelectOp.vp        |   1 -
 powerpc/SelectOpproof.v    |   5 --
 20 files changed, 374 insertions(+), 107 deletions(-)

diff --git a/Changelog b/Changelog
index 5f0c7c7..b603c32 100644
--- a/Changelog
+++ b/Changelog
@@ -5,8 +5,13 @@ Development trunk:
   arithmetic.
 - Optimize integer divisions by positive constants, turning them into
   multiply-high and shifts.
+- Optimize floating-point divisions by powers of 2, turning them
+  into multiplications.
+- Optimize "x * 2.0" and "2.0 * x" into "x + x".
 - Avoid double rounding issues in conversion from 64-bit integers
   to single-precision floats.
+- Fixed typing rules for __builtin_annot() to avoid casting arguments
+  of small integer or FP types.
 - PowerPC: more efficient implementation of division on 64-bit integers.
 - Minor simplifications in the generic solvers for dataflow analysis.
 
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index caf0da6..47d2086 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -287,6 +287,11 @@ Definition make_xorimm (n: int) (r: reg) :=
   else if Int.eq n Int.mone then (Onot, r :: nil)
   else (Oxorimm n, r :: nil).
 
+Definition make_mulfimm (n: float) (r r1 r2: reg) :=
+  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  then (Oaddf, r :: r :: nil)
+  else (Omulf, r1 :: r2 :: nil).
+
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list approx) :=
   match op, args, vl with
@@ -317,6 +322,8 @@ Nondetfunction op_strength_reduction
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
   | Ocmp c, args, vl =>
       let (c', args') := cond_strength_reduction c args vl in (Ocmp c', args')
+  | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
+  | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index 687e08f..f0d6b7f 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -372,6 +372,33 @@ Proof.
   econstructor; split; eauto. auto. 
 Qed.
 
+Lemma make_mulfimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vfloat n ->
+  let (op, args) := make_mulfimm n r1 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vfloat n ->
+  let (op, args) := make_mulfimm n r2 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
+  rewrite Float.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = approx_regs app args ->
@@ -431,6 +458,11 @@ Proof.
   generalize (cond_strength_reduction_correct c args0 vl0). 
   destruct (cond_strength_reduction c args0 vl0) as [c' args']; intros.
   rewrite <- H1 in H0; auto. econstructor; split; eauto.
+(* mulf *)
+  inv H0. assert (rs#r2 = Vfloat n2). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct; auto.
+  inv H0. assert (rs#r1 = Vfloat n1). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/arm/Nan.v b/arm/Nan.v
index d66d6ec..e2bddf6 100644
--- a/arm/Nan.v
+++ b/arm/Nan.v
@@ -1,34 +1,29 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Jacques-Henri Jourdan, INRIA Paris-Rocquencourt            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
 Require Import Fappli_IEEE.
 Require Import Fappli_IEEE_bits.
 Require Import Floats.
 Require Import ZArith.
 Require Import Integers.
 
-(* Needed to break a circular reference after extraction *)
-Definition transform_quiet_pl :=
-  Eval unfold Float.transform_quiet_pl in Float.transform_quiet_pl.
-
 Program Definition default_pl : bool * nan_pl 53 := (false, nat_iter 51 xO xH).
 
-Definition binop_pl (pl1 pl2:binary64) : bool*nan_pl 53 :=
-  match pl1, pl2 with
-    | B754_nan s1 pl1, B754_nan s2 pl2 =>
-      if (Pos.testbit (proj1_sig pl1) 51 && (* pl2 is sNan but pl1 is qNan *)
-         negb (Pos.testbit (proj1_sig pl2) 51))%bool
-      then (s2, transform_quiet_pl pl2)
-      else (s1, transform_quiet_pl pl1)
-    | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
-    | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
-    | _, _ => default_pl
-  end.
-
-Theorem binop_propagate1: Float.binop_propagate1_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2; try discriminate; simpl;
-  destruct Pos.testbit; reflexivity.
-Qed.
-
-Theorem binop_propagate2: Float.binop_propagate2_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2, f3; try discriminate; simpl; reflexivity.
-Qed.
+Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+  (** Choose second NaN if pl2 is sNaN but pl1 is qNan.
+      In all other cases, choose first NaN *)
+  (Pos.testbit (proj1_sig pl1) 51 &&
+   negb (Pos.testbit (proj1_sig pl2) 51))%bool.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index 24573a5..e5ea64d 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -322,7 +322,9 @@ Function annot_strength_reduction
       let (targs'', args'') := annot_strength_reduction app targs' args' in
       match ty, approx_reg app arg with
       | Tint, I n => (AA_int n :: targs'', args'')
-      | Tfloat, F n => (AA_float n :: targs'', args'')
+      | Tfloat, F n => if generate_float_constants tt
+                       then (AA_float n :: targs'', args'')
+                       else (AA_arg ty :: targs'', arg :: args'')
       | _, _ => (AA_arg ty :: targs'', arg :: args'')
       end
   | targ :: targs', _ =>
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index 328f724..898c4df 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -517,10 +517,14 @@ Proof.
       exists (ev1 :: eargs''); split.
       simpl; constructor; auto. simpl. congruence.
     }
-    destruct ty; destruct (approx_reg app arg) as [] eqn:E2; inv H; auto;
-    exists eargs''; split; auto; simpl; f_equal; auto;
-    generalize (MATCH arg); rewrite E2; simpl; intros E3;
-    rewrite E3 in H5; inv H5; auto.
+    destruct ty; destruct (approx_reg app arg) as [] eqn:E2; inv H; auto.
+    * exists eargs''; split; auto; simpl; f_equal; auto.
+      generalize (MATCH arg); rewrite E2; simpl; intros E3;
+      rewrite E3 in H5; inv H5; auto.
+    * destruct (generate_float_constants tt); inv H1; auto. 
+      exists eargs''; split; auto; simpl; f_equal; auto.
+      generalize (MATCH arg); rewrite E2; simpl; intros E3;
+      rewrite E3 in H5; inv H5; auto.
   + destruct (annot_strength_reduction app targs args) as [targs'' args''] eqn:E.
     inv H.
     exploit IHtargs; eauto. intros [eargs'' [A B]].
diff --git a/backend/SelectDiv.vp b/backend/SelectDiv.vp
index e60a97d..1d9168c 100644
--- a/backend/SelectDiv.vp
+++ b/backend/SelectDiv.vp
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Instruction selection for integer division and modulus *)
+(** Instruction selection for division and modulus *)
 
 Require Import Coqlib.
 Require Import AST.
@@ -154,3 +154,19 @@ Nondetfunction mods (e1: expr) (e2: expr) :=
   | _ => mods_base e1 e2
   end.
 
+(** Floating-point division by a constant can also be turned into a FP
+    multiplication by the inverse constant, but only for powers of 2. *)
+
+Definition divfimm (e: expr) (n: float) :=
+  match Float.exact_inverse n with
+  | Some n' => Eop Omulf (e ::: Eop (Ofloatconst n') Enil ::: Enil)
+  | None    => Eop Odivf (e ::: Eop (Ofloatconst n) Enil ::: Enil)
+  end.
+
+Nondetfunction divf (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ofloatconst n2) Enil => divfimm e1 n2
+  | _ => Eop Odivf (e1 ::: e2 ::: Enil)
+  end.
+
+
diff --git a/backend/SelectDivproof.v b/backend/SelectDivproof.v
index 4d8f96a..0719a5e 100644
--- a/backend/SelectDivproof.v
+++ b/backend/SelectDivproof.v
@@ -544,4 +544,22 @@ Proof.
 - eapply eval_mods_base; eauto. 
 Qed.
 
+(** * Floating-point division *)
+
+Theorem eval_divf:
+  forall le a b x y,
+  eval_expr ge sp e m le a x ->
+  eval_expr ge sp e m le b y ->
+  exists v, eval_expr ge sp e m le (divf a b) v /\ Val.lessdef (Val.divf x y) v.
+Proof.
+  intros until y. unfold divf. destruct (divf_match b); intros.
+- unfold divfimm. destruct (Float.exact_inverse n2) as [n2' | ] eqn:EINV.
+  + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
+    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. 
+    simpl; eauto. 
+    destruct x; simpl; auto. erewrite Float.div_mul_inverse; eauto. 
+  + TrivialExists. 
+- TrivialExists.
+Qed.
+
 End CMCONSTRS.
diff --git a/extraction/extraction.v b/extraction/extraction.v
index ba1ca58..047a9b4 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -32,8 +32,8 @@ Require Import ExtrOcamlString.
 Extraction Inline Wfsimpl.Fix Wfsimpl.Fixm.
 
 (* Floats *)
-Extract Constant Floats.Float.binop_pl =>
-  "Nan.binop_pl".
+Extract Constant Floats.Float.default_pl => "Nan.default_pl".
+Extract Constant Floats.Float.choose_binop_pl => "Nan.choose_binop_pl".
 
 (* AST *)
 Extract Constant AST.ident_of_string =>
@@ -135,4 +135,4 @@ Separate Extraction
    RTL.instr_defs RTL.instr_uses
    Machregs.mregs_for_operation Machregs.mregs_for_builtin
    Machregs.two_address_op
-   Nan.binop_pl.
+   Nan.default_pl Nan.choose_binop_pl.
diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp
index a29b450..8c3a7fa 100644
--- a/ia32/ConstpropOp.vp
+++ b/ia32/ConstpropOp.vp
@@ -291,20 +291,25 @@ Definition make_moduimm n (r1 r2: reg) :=
   | None   => (Omodu, r1 :: r2 :: nil)
   end.
 
-(** We must be careful to preserve 2-address constraints over the
-    RTL code, which means that commutative operations cannot
-    be specialized if their first argument is a constant. *)
+Definition make_mulfimm (n: float) (r r1 r2: reg) :=
+  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  then (Oaddf, r :: r :: nil)
+  else (Omulf, r1 :: r2 :: nil).
 
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list approx) :=
   match op, args, vl with
   | Osub, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_addimm (Int.neg n2) r1
+  | Omul, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_mulimm n1 r2
   | Omul, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_mulimm n2 r1
   | Odiv, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divimm n2 r1 r2
   | Odivu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_divuimm n2 r1 r2
   | Omodu, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_moduimm n2 r1 r2
+  | Oand, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_andimm n1 r2
   | Oand, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_andimm n2 r1
+  | Oor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_orimm n1 r2
   | Oor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_orimm n2 r1
+  | Oxor, r1 :: r2 :: nil, I n1 :: v2 :: nil => make_xorimm n1 r2
   | Oxor, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_xorimm n2 r1
   | Oshl, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shlimm n2 r1
   | Oshr, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shrimm n2 r1
@@ -315,6 +320,8 @@ Nondetfunction op_strength_reduction
   | Ocmp c, args, vl =>
       let (c', args') := cond_strength_reduction c args vl in 
       (Ocmp c', args')
+  | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
+  | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v
index b6c3cdc..a4cb402 100644
--- a/ia32/ConstpropOpproof.v
+++ b/ia32/ConstpropOpproof.v
@@ -380,6 +380,33 @@ Proof.
   econstructor; split; eauto. auto. 
 Qed.
 
+Lemma make_mulfimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vfloat n ->
+  let (op, args) := make_mulfimm n r1 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vfloat n ->
+  let (op, args) := make_mulfimm n r2 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
+  rewrite Float.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = approx_regs app args ->
@@ -392,6 +419,7 @@ Proof.
 (* sub *)
   InvApproxRegs. SimplVMA. inv H0; rewrite H. rewrite Val.sub_add_opp. apply make_addimm_correct; auto. 
 (* mul *)
+  InvApproxRegs. SimplVMA. inv H0; rewrite H1. rewrite Val.mul_commut. apply make_mulimm_correct; auto.
   InvApproxRegs. SimplVMA. inv H0; rewrite H. apply make_mulimm_correct; auto.
 (* divs *) 
   assert (rs#r2 = Vint n2). clear H0. InvApproxRegs; SimplVMA; auto.
@@ -403,10 +431,13 @@ Proof.
   assert (rs#r2 = Vint n2). clear H0. InvApproxRegs; SimplVMA; auto.
   apply make_moduimm_correct; auto.
 (* and *)
+  InvApproxRegs. SimplVMA. inv H0; rewrite H1. rewrite Val.and_commut. apply make_andimm_correct; auto.
   InvApproxRegs. SimplVMA. inv H0; rewrite H. apply make_andimm_correct; auto.
 (* or *)
+  InvApproxRegs. SimplVMA. inv H0; rewrite H1. rewrite Val.or_commut. apply make_orimm_correct; auto.
   InvApproxRegs. SimplVMA. inv H0; rewrite H. apply make_orimm_correct; auto.
 (* xor *)
+  InvApproxRegs. SimplVMA. inv H0; rewrite H1. rewrite Val.xor_commut. apply make_xorimm_correct; auto.
   InvApproxRegs. SimplVMA. inv H0; rewrite H. apply make_xorimm_correct; auto.
 (* shl *)
   InvApproxRegs. SimplVMA. inv H0; rewrite H. apply make_shlimm_correct; auto.
@@ -422,6 +453,11 @@ Proof.
   generalize (cond_strength_reduction_correct c args0 vl0 H). 
   destruct (cond_strength_reduction c args0 vl0) as [c' args']; intros.
   rewrite <- H1 in H0; auto. econstructor; split; eauto.
+(* mulf *)
+  inv H0. assert (rs#r2 = Vfloat n2). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct; auto.
+  inv H0. assert (rs#r1 = Vfloat n1). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/ia32/Nan.v b/ia32/Nan.v
index dadf0ca..f3e777e 100644
--- a/ia32/Nan.v
+++ b/ia32/Nan.v
@@ -1,28 +1,26 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Jacques-Henri Jourdan, INRIA Paris-Rocquencourt            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
 Require Import Fappli_IEEE.
 Require Import Fappli_IEEE_bits.
 Require Import Floats.
 Require Import ZArith.
 Require Import Integers.
 
-(* Needed to break a circular reference after extraction *)
-Definition transform_quiet_pl :=
-  Eval unfold Float.transform_quiet_pl in Float.transform_quiet_pl.
-
 Program Definition default_pl : bool * nan_pl 53 := (true, nat_iter 51 xO xH).
 
-Definition binop_pl (pl1 pl2:binary64) : bool*nan_pl 53 :=
-  match pl1, pl2 with
-    | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
-    | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
-    | _, _ => default_pl
-  end.
-
-Theorem binop_propagate1: Float.binop_propagate1_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2; try discriminate; simpl; reflexivity.
-Qed.
-
-Theorem binop_propagate2: Float.binop_propagate2_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2, f3; try discriminate; simpl; reflexivity.
-Qed.
+Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+  false.                        (** always choose first NaN *)
diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp
index 209147e..1471405 100644
--- a/ia32/SelectOp.vp
+++ b/ia32/SelectOp.vp
@@ -334,7 +334,18 @@ Definition absf (e: expr) :=  Eop Oabsf (e ::: Enil).
 Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
-Definition divf (e1 e2: expr) :=  Eop Odivf (e1 ::: e2 ::: Enil).
+
+Definition divfimm (e: expr) (n: float) :=
+  match Float.exact_inverse n with
+  | Some n' => Eop Omulf (e ::: Eop (Ofloatconst n') Enil ::: Enil)
+  | None    => Eop Odivf (e ::: Eop (Ofloatconst n) Enil ::: Enil)
+  end.
+
+Nondetfunction divf (e1: expr) (e2: expr) :=
+  match e2 with
+  | Eop (Ofloatconst n2) Enil => divfimm e1 n2
+  | _ => Eop Odivf (e1 ::: e2 ::: Enil)
+  end.
 
 (** ** Comparisons *)
 
diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v
index 85802b6..cec3b59 100644
--- a/ia32/SelectOpproof.v
+++ b/ia32/SelectOpproof.v
@@ -576,7 +576,14 @@ Qed.
 
 Theorem eval_divf: binary_constructor_sound divf Val.divf.
 Proof.
-  red; intros; TrivialExists.
+  red. intros until y. unfold divf. destruct (divf_match b); intros.
+- unfold divfimm. destruct (Float.exact_inverse n2) as [n2' | ] eqn:EINV.
+  + inv H0. inv H4. simpl in H6. inv H6. econstructor; split.
+    EvalOp. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor. 
+    simpl; eauto. 
+    destruct x; simpl; auto. erewrite Float.div_mul_inverse; eauto. 
+  + TrivialExists. 
+- TrivialExists.
 Qed.
 
 Section COMP_IMM.
diff --git a/lib/Floats.v b/lib/Floats.v
index 0151cf0..0a30836 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -260,23 +260,27 @@ Definition singleoflong (n:int64): float := (**r conversion from signed 64-bit i
 Definition singleoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int to single-precision float *)
   floatofbinary32 (binary_normalize32 (Int64.unsigned n) 0 false).
 
-Parameter binop_pl: binary64 -> binary64 -> bool*nan_pl 53.
-
-Definition binop_propagate1_prop binop_pl :=
-  forall f1 f2:float, is_nan _ _ f1 = true -> is_nan _ _ f2 = false ->
-    binop_pl f1 f1 = (binop_pl f1 f2 : bool*nan_pl 53).
-
-
-(* Proved in Nan.v for different architectures *)
-Hypothesis binop_propagate1: binop_propagate1_prop binop_pl.
-
-Definition binop_propagate2_prop binop_pl :=
-  forall f1 f2 f3:float, is_nan _ _ f1 = true ->
-                         is_nan _ _ f2 = false -> is_nan _ _ f3 = false ->
-    binop_pl f1 f2 = (binop_pl f1 f3 : bool*nan_pl 53).
-
-(* Proved in Nan.v for different architectures *)
-Hypothesis binop_propagate2: binop_propagate2_prop binop_pl.
+(* The Nan payload operations for two-argument arithmetic operations are not part of
+   the IEEE754 standard, but all architectures of Compcert share a similar
+   NaN behavior, parameterized by:
+- a "default" payload which occurs when an operation generates a NaN from
+  non-NaN arguments;
+- a choice function determining which of the payload arguments to choose,
+  when an operation is given two NaN arguments. *)
+
+Parameter default_pl : bool*nan_pl 53.
+Parameter choose_binop_pl : bool -> nan_pl 53 -> bool -> nan_pl 53 -> bool.
+
+Definition binop_pl (x y: binary64) : bool*nan_pl 53 :=
+  match x, y with
+  | B754_nan s1 pl1, B754_nan s2 pl2 =>
+      if choose_binop_pl s1 pl1 s2 pl2
+      then (s2, transform_quiet_pl pl2)
+      else (s1, transform_quiet_pl pl1)
+  | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
+  | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
+  | _, _ => default_pl
+  end.
 
 Definition add: float -> float -> float := b64_plus binop_pl mode_NE. (**r addition *)
 Definition sub: float -> float -> float := b64_minus binop_pl mode_NE. (**r subtraction *)
@@ -496,6 +500,57 @@ Proof.
   right; red; intros; elim n. apply singleoffloat_of_single; auto.
 Defined.
 
+(** Commutativity properties of addition and multiplication. *)
+
+Theorem add_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
+Proof.
+  intros x y NAN. unfold add, b64_plus. 
+  pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
+  pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
+  unfold Bplus in *; destruct x; destruct y; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB; auto. f_equal; apply eqb_prop; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB.
+  f_equal; apply eqb_prop; auto.
+  auto.
+- simpl in NAN; intuition congruence.
+- exploit H; auto. clear H. exploit H0; auto. clear H0. 
+  set (x := B754_finite 53 1024 b0 m0 e1 e2). 
+  set (rx := B2R 53 1024 x).
+  set (y := B754_finite 53 1024 b m e e0).
+  set (ry := B2R 53 1024 y).
+  rewrite (Rplus_comm ry rx). destruct Rlt_bool. 
+  intros (A1 & A2 & A3) (B1 & B2 & B3).
+  apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
+  rewrite Z.add_comm. rewrite Z.min_comm. auto.
+  intros (A1 & A2) (B1 & B2). apply B2FF_inj. rewrite B2 in B1. rewrite <- B1 in A1. auto. 
+Qed.
+
+Theorem mul_commut:
+  forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
+Proof.
+  intros x y NAN. unfold mul, b64_mult. 
+  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
+  pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
+  unfold Bmult in *; destruct x; destruct y; auto.
+- f_equal. apply xorb_comm. 
+- f_equal. apply xorb_comm. 
+- f_equal. apply xorb_comm. 
+- f_equal. apply xorb_comm. 
+- simpl in NAN. intuition congruence.
+- f_equal. apply xorb_comm. 
+- f_equal. apply xorb_comm. 
+- set (x := B754_finite 53 1024 b0 m0 e1 e2) in *. 
+  set (rx := B2R 53 1024 x) in *.
+  set (y := B754_finite 53 1024 b m e e0) in *.
+  set (ry := B2R 53 1024 y) in *.
+  rewrite (Rmult_comm ry rx) in *. destruct Rlt_bool. 
+  destruct H as (A1 & A2 & A3); destruct H0 as (B1 & B2 & B3).
+  apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto. 
+  rewrite ! Bsign_FF2B. f_equal. f_equal. apply xorb_comm. apply Pos.mul_comm. apply Z.add_comm.
+  apply B2FF_inj. etransitivity. eapply H. rewrite xorb_comm. auto. 
+Qed.
+
 (** Properties of comparisons. *)
 
 Theorem order_float_finite_correct:
@@ -1181,7 +1236,7 @@ Proof.
       symmetry. etransitivity. apply H0.
       f_equal. destruct Bsign; reflexivity.
   - destruct f as [[]|[]| |]; try discriminate; try reflexivity.
-    simpl. erewrite binop_propagate1; reflexivity.
+    simpl. destruct (choose_binop_pl b n b n); auto.
 Qed.
 
 Program Definition pow2_float (b:bool) (e:Z) (H:-1023 < e < 1023) : float :=
@@ -1216,9 +1271,7 @@ Proof.
     + apply B2FF_inj.
       etransitivity. apply H0. symmetry. etransitivity. apply H1.
       reflexivity.
-  - destruct f; try discriminate EQFIN. reflexivity.
-    simpl. erewrite binop_propagate2. reflexivity.
-    reflexivity. reflexivity. reflexivity.
+  - destruct f; try discriminate EQFIN; auto. 
   - simpl.
     assert ((4503599627370496 * bpow radix2 (e - 52))%R =
             (/ (4503599627370496 * bpow radix2 (- e - 52)))%R).
@@ -1233,6 +1286,51 @@ Proof.
     destruct b; simpl in H2; omega.
 Qed.
 
+Definition exact_inverse_mantissa := nat_iter 52 xO xH.
+
+Program Definition exact_inverse (f: float) : option float :=
+  match f with
+  | B754_finite s m e B =>
+      if peq m exact_inverse_mantissa then
+      if zlt (-1023) (e + 52) then
+      if zlt (e + 52) 1023 then
+        Some(B754_finite _ _ s m (-e - 104) _)
+      else None else None else None
+  | _ => None
+  end.
+Next Obligation.
+  unfold Fappli_IEEE.bounded, canonic_mantissa. apply andb_true_iff; split.
+  simpl Z.of_nat. apply Zeq_bool_true. unfold FLT_exp. apply Z.max_case_strong; omega.
+  apply Zle_bool_true. omega.  
+Qed.
+
+Remark B754_finite_eq:
+  forall s1 m1 e1 B1 s2 m2 e2 B2,
+  s1 = s2 -> m1 = m2 -> e1 = e2 ->
+  B754_finite _ _ s1 m1 e1 B1 = (B754_finite _ _ s2 m2 e2 B2 : float).
+Proof.
+  intros. subst. f_equal. apply proof_irrelevance. 
+Qed.
+
+Theorem div_mul_inverse:
+  forall x y z, exact_inverse y = Some z -> div x y = mul x z.
+Proof with (try discriminate).
+  unfold exact_inverse; intros. destruct y...
+  destruct (peq m exact_inverse_mantissa)...
+  destruct (zlt (-1023) (e + 52))...
+  destruct (zlt (e + 52) 1023)...
+  inv H.
+  set (n := - e - 52).
+  assert (RNG1: -1023 < n < 1023) by (unfold n; omega).
+  assert (RNG2: -1023 < -n < 1023) by (unfold n; omega).
+  symmetry. 
+  transitivity (mul x (pow2_float b n RNG1)).
+  f_equal. apply B754_finite_eq; auto. unfold n; omega.
+  transitivity (div x (pow2_float b (-n) RNG2)).
+  apply mul_div_pow2. 
+  f_equal. apply B754_finite_eq; auto. unfold n; omega.
+Qed.
+
 Global Opaque
   zero eq_dec neg abs singleoffloat intoffloat intuoffloat floatofint floatofintu
   add sub mul div cmp bits_of_double double_of_bits bits_of_single single_of_bits from_words.
diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp
index d0322c1..b755b5e 100644
--- a/powerpc/ConstpropOp.vp
+++ b/powerpc/ConstpropOp.vp
@@ -245,6 +245,11 @@ Definition make_xorimm (n: int) (r: reg) :=
   then (Omove, r :: nil)
   else (Oxorimm n, r :: nil).
 
+Definition make_mulfimm (n: float) (r r1 r2: reg) :=
+  if Float.eq_dec n (Float.floatofint (Int.repr 2))
+  then (Oaddf, r :: r :: nil)
+  else (Omulf, r1 :: r2 :: nil).
+
 Nondetfunction op_strength_reduction 
               (op: operation) (args: list reg) (vl: list approx) :=
   match op, args, vl with
@@ -267,6 +272,8 @@ Nondetfunction op_strength_reduction
   | Oshru, r1 :: r2 :: nil, v1 :: I n2 :: nil => make_shruimm n2 r1 r2
   | Ocmp c, args, vl =>
       let (c', args') := cond_strength_reduction c args vl in (Ocmp c', args')
+  | Omulf, r1 :: r2 :: nil, v1 :: F n2 :: nil => make_mulfimm n2 r1 r1 r2
+  | Omulf, r1 :: r2 :: nil, F n1 :: v2 :: nil => make_mulfimm n1 r2 r1 r2
   | _, _, _ => (op, args)
   end.
 
diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v
index 9d833bf..b7fad69 100644
--- a/powerpc/ConstpropOpproof.v
+++ b/powerpc/ConstpropOpproof.v
@@ -364,6 +364,33 @@ Proof.
   econstructor; split; eauto. auto. 
 Qed.
 
+Lemma make_mulfimm_correct:
+  forall n r1 r2,
+  rs#r2 = Vfloat n ->
+  let (op, args) := make_mulfimm n r1 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r1); simpl; auto. rewrite Float.mul2_add; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
+Lemma make_mulfimm_correct_2:
+  forall n r1 r2,
+  rs#r1 = Vfloat n ->
+  let (op, args) := make_mulfimm n r2 r1 r2 in
+  exists v, eval_operation ge sp op rs##args m = Some v /\ Val.lessdef (Val.mulf rs#r1 rs#r2) v.
+Proof.
+  intros; unfold make_mulfimm. 
+  destruct (Float.eq_dec n (Float.floatofint (Int.repr 2))); intros. 
+  simpl. econstructor; split. eauto. rewrite H; subst n.
+  destruct (rs#r2); simpl; auto. rewrite Float.mul2_add; auto. 
+  rewrite Float.mul_commut; auto. 
+  simpl. econstructor; split; eauto. 
+Qed.
+
 Lemma op_strength_reduction_correct:
   forall op args vl v,
   vl = approx_regs app args ->
@@ -407,6 +434,11 @@ Proof.
   generalize (cond_strength_reduction_correct c args0 vl0). 
   destruct (cond_strength_reduction c args0 vl0) as [c' args']; intros.
   rewrite <- H1 in H0; auto. econstructor; split; eauto.
+(* mulf *)
+  inv H0. assert (rs#r2 = Vfloat n2). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct; auto.
+  inv H0. assert (rs#r1 = Vfloat n1). InvApproxRegs; SimplVMA; auto.
+  apply make_mulfimm_correct_2; auto.
 (* default *)
   exists v; auto.
 Qed.
diff --git a/powerpc/Nan.v b/powerpc/Nan.v
index 77a752e..c230789 100644
--- a/powerpc/Nan.v
+++ b/powerpc/Nan.v
@@ -1,28 +1,26 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*          Jacques-Henri Jourdan, INRIA Paris-Rocquencourt            *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the GNU General Public License as published by  *)
+(*  the Free Software Foundation, either version 2 of the License, or  *)
+(*  (at your option) any later version.  This file is also distributed *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
 Require Import Fappli_IEEE.
 Require Import Fappli_IEEE_bits.
 Require Import Floats.
 Require Import ZArith.
 Require Import Integers.
 
-(* Needed to break a circular reference after extraction *)
-Definition transform_quiet_pl :=
-  Eval unfold Float.transform_quiet_pl in Float.transform_quiet_pl.
-
 Program Definition default_pl : bool * nan_pl 53 := (false, nat_iter 51 xO xH).
 
-Definition binop_pl (pl1 pl2:binary64) : bool*nan_pl 53 :=
-  match pl1, pl2 with
-    | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
-    | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
-    | _, _ => default_pl
-  end.
-
-Theorem binop_propagate1: Float.binop_propagate1_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2; try discriminate; reflexivity.
-Qed.
-
-Theorem binop_propagate2: Float.binop_propagate2_prop binop_pl.
-Proof.
-  repeat intro. destruct f1, f2, f3; try discriminate; simpl; reflexivity.
-Qed.
+Definition choose_binop_pl (s1: bool) (pl1: nan_pl 53) (s2: bool) (pl2: nan_pl 53) :=
+  false.
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index bc9b677..b0e7c4d 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -334,7 +334,6 @@ Definition absf (e: expr) :=  Eop Oabsf (e ::: Enil).
 Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
-Definition divf (e1 e2: expr) :=  Eop Odivf (e1 ::: e2 ::: Enil).
 
 (** ** Comparisons *)
 
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 177d64a..c0601eb 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -600,11 +600,6 @@ Proof.
   red; intros; TrivialExists.
 Qed.
 
-Theorem eval_divf: binary_constructor_sound divf Val.divf.
-Proof.
-  red; intros; TrivialExists.
-Qed.
-
 Section COMP_IMM.
 
 Variable default: comparison -> int -> condition.
-- 
cgit v1.2.3