summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-07-23 08:54:56 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-07-23 08:54:56 +0000
commit2a0168fea37b68ad14e2cb60bf215111e49d4870 (patch)
tree2f59373790d8ce3a5df66ef7a692271cf0666c6c /lib
parent00805153cf9b88aa07cc6694b17d93f5ba2e7de8 (diff)
Merge of "newspilling" branch:
- Support single-precision floats as first-class values - Introduce chunks Many32, Many64 and types Tany32, Tany64 to support saving and restoring registers without knowing the exact types (int/single/float) of their contents, just their sizes. - Memory model: generalize the opaque encoding of pointers to apply to any value, not just pointers, if chunks Many32/Many64 are selected. - More properties of FP arithmetic proved. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2537 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'lib')
-rw-r--r--lib/Camlcoq.ml9
-rw-r--r--lib/Fappli_IEEE_extra.v1506
-rw-r--r--lib/Floats.v2482
3 files changed, 2529 insertions, 1468 deletions
diff --git a/lib/Camlcoq.ml b/lib/Camlcoq.ml
index 8d6fd24..dd89636 100644
--- a/lib/Camlcoq.ml
+++ b/lib/Camlcoq.ml
@@ -320,9 +320,14 @@ let coqstring_of_camlstring s =
(* Floats *)
let coqfloat_of_camlfloat f =
- Float.double_of_bits(coqint_of_camlint64(Int64.bits_of_float f))
+ Float.of_bits(coqint_of_camlint64(Int64.bits_of_float f))
let camlfloat_of_coqfloat f =
- Int64.float_of_bits(camlint64_of_coqint(Float.bits_of_double f))
+ Int64.float_of_bits(camlint64_of_coqint(Float.to_bits f))
+
+let coqfloat32_of_camlfloat f =
+ Float32.of_bits(coqint_of_camlint(Int32.bits_of_float f))
+let camlfloat_of_coqfloat32 f =
+ Int32.float_of_bits(camlint_of_coqint(Float32.to_bits f))
(* Int31 *)
diff --git a/lib/Fappli_IEEE_extra.v b/lib/Fappli_IEEE_extra.v
new file mode 100644
index 0000000..5194a64
--- /dev/null
+++ b/lib/Fappli_IEEE_extra.v
@@ -0,0 +1,1506 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(** Additional operations and proofs about IEEE-754 binary
+ floating-point numbers, on top of the Flocq library. *)
+
+Require Import Psatz.
+Require Import Bool.
+Require Import Eqdep_dec.
+Require Import Fcore.
+Require Import Fcore_digits.
+Require Import Fcalc_digits.
+Require Import Fcalc_ops.
+Require Import Fcalc_round.
+Require Import Fcalc_bracket.
+Require Import Fprop_Sterbenz.
+Require Import Fappli_IEEE.
+Require Import Fappli_rnd_odd.
+
+Local Open Scope Z_scope.
+
+Section Extra_ops.
+
+(** [prec] is the number of bits of the mantissa including the implicit one.
+ [emax] is the exponent of the infinities.
+ Typically p=24 and emax = 128 in single precision. *)
+
+Variable prec emax : Z.
+Context (prec_gt_0_ : Prec_gt_0 prec).
+Let emin := (3 - emax - prec)%Z.
+Let fexp := FLT_exp emin prec.
+Hypothesis Hmax : (prec < emax)%Z.
+Let binary_float := binary_float prec emax.
+
+(** Remarks on [is_finite] *)
+
+Remark is_finite_not_is_nan:
+ forall (f: binary_float), is_finite _ _ f = true -> is_nan _ _ f = false.
+Proof.
+ destruct f; reflexivity || discriminate.
+Qed.
+
+Remark is_finite_strict_finite:
+ forall (f: binary_float), is_finite_strict _ _ f = true -> is_finite _ _ f = true.
+Proof.
+ destruct f; reflexivity || discriminate.
+Qed.
+
+(** Digression on FP numbers that cannot be [-0.0]. *)
+
+Definition is_finite_pos0 (f: binary_float) : bool :=
+ match f with
+ | B754_zero s => negb s
+ | B754_infinity _ => false
+ | B754_nan _ _ => false
+ | B754_finite _ _ _ _ => true
+ end.
+
+Lemma Bsign_pos0:
+ forall x, is_finite_pos0 x = true -> Bsign _ _ x = Rlt_bool (B2R _ _ x) 0%R.
+Proof.
+ intros. destruct x as [ [] | | | [] ex mx Bx ]; try discriminate; simpl.
+- rewrite Rlt_bool_false; auto. lra.
+- rewrite Rlt_bool_true; auto. apply F2R_lt_0_compat. compute; auto.
+- rewrite Rlt_bool_false; auto.
+ assert ((F2R (Float radix2 (Z.pos ex) mx) > 0)%R) by
+ ( apply F2R_gt_0_compat; compute; auto ).
+ lra.
+Qed.
+
+Theorem B2R_inj_pos0:
+ forall x y,
+ is_finite_pos0 x = true -> is_finite_pos0 y = true ->
+ B2R _ _ x = B2R _ _ y ->
+ x = y.
+Proof.
+ intros. apply B2R_Bsign_inj.
+ destruct x; reflexivity||discriminate.
+ destruct y; reflexivity||discriminate.
+ auto.
+ rewrite ! Bsign_pos0 by auto. rewrite H1; auto.
+Qed.
+
+(** ** Decidable equality *)
+
+Definition Beq_dec: forall (f1 f2: binary_float), {f1 = f2} + {f1 <> f2}.
+Proof.
+ assert (UIP_bool: forall (b1 b2: bool) (e e': b1 = b2), e = e').
+ { intros. apply UIP_dec. decide equality. }
+ Ltac try_not_eq := try solve [right; congruence].
+ destruct f1 as [| |? []|], f2 as [| |? []|];
+ try destruct b; try destruct b0;
+ try solve [left; auto]; try_not_eq.
+ destruct (positive_eq_dec x x0); try_not_eq;
+ subst; left; f_equal; f_equal; apply UIP_bool.
+ destruct (positive_eq_dec x x0); try_not_eq;
+ subst; left; f_equal; f_equal; apply UIP_bool.
+ destruct (positive_eq_dec m m0); try_not_eq;
+ destruct (Z_eq_dec e e1); try solve [right; intro H; inversion H; congruence];
+ subst; left; f_equal; apply UIP_bool.
+ destruct (positive_eq_dec m m0); try_not_eq;
+ destruct (Z_eq_dec e e1); try solve [right; intro H; inversion H; congruence];
+ subst; left; f_equal; apply UIP_bool.
+Defined.
+
+(** ** Comparison *)
+
+(** [Some c] means ordered as per [c]; [None] means unordered. *)
+
+Definition Bcompare (f1 f2: binary_float): option comparison :=
+ match f1, f2 with
+ | B754_nan _ _,_ | _,B754_nan _ _ => None
+ | B754_infinity true, B754_infinity true
+ | B754_infinity false, B754_infinity false => Some Eq
+ | B754_infinity true, _ => Some Lt
+ | B754_infinity false, _ => Some Gt
+ | _, B754_infinity true => Some Gt
+ | _, B754_infinity false => Some Lt
+ | B754_finite true _ _ _, B754_zero _ => Some Lt
+ | B754_finite false _ _ _, B754_zero _ => Some Gt
+ | B754_zero _, B754_finite true _ _ _ => Some Gt
+ | B754_zero _, B754_finite false _ _ _ => Some Lt
+ | B754_zero _, B754_zero _ => Some Eq
+ | B754_finite s1 m1 e1 _, B754_finite s2 m2 e2 _ =>
+ match s1, s2 with
+ | true, false => Some Lt
+ | false, true => Some Gt
+ | false, false =>
+ match Zcompare e1 e2 with
+ | Lt => Some Lt
+ | Gt => Some Gt
+ | Eq => Some (Pcompare m1 m2 Eq)
+ end
+ | true, true =>
+ match Zcompare e1 e2 with
+ | Lt => Some Gt
+ | Gt => Some Lt
+ | Eq => Some (CompOpp (Pcompare m1 m2 Eq))
+ end
+ end
+ end.
+
+Theorem Bcompare_finite_correct:
+ forall f1 f2,
+ is_finite _ _ f1 = true -> is_finite _ _ f2 = true ->
+ Bcompare f1 f2 = Some (Rcompare (B2R _ _ f1) (B2R _ _ f2)).
+Proof.
+ Ltac apply_Rcompare :=
+ match goal with
+ | [ |- Some Lt = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Lt
+ | [ |- Some Eq = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Eq
+ | [ |- Some Gt = Some (Rcompare _ _) ] => f_equal; symmetry; apply Rcompare_Gt
+ end.
+ unfold Bcompare; intros.
+ destruct f1, f2; try discriminate; unfold B2R, F2R, Fnum, Fexp, cond_Zopp;
+ try (replace 0%R with (Z2R 0 * bpow radix2 e)%R by (simpl Z2R; ring);
+ rewrite Rcompare_mult_r by (apply bpow_gt_0); rewrite Rcompare_Z2R).
+ apply_Rcompare; reflexivity.
+ destruct b0; reflexivity.
+ destruct b; reflexivity.
+ clear H H0.
+ apply andb_prop in e0; destruct e0; apply (canonic_canonic_mantissa _ _ false) in H.
+ apply andb_prop in e2; destruct e2; apply (canonic_canonic_mantissa _ _ false) in H1.
+ pose proof (Zcompare_spec e e1); unfold canonic, Fexp in H1, H.
+ assert (forall m1 m2 e1 e2,
+ let x := (Z2R (Zpos m1) * bpow radix2 e1)%R in
+ let y := (Z2R (Zpos m2) * bpow radix2 e2)%R in
+ (canonic_exp radix2 fexp x < canonic_exp radix2 fexp y)%Z -> (x < y)%R).
+ {
+ intros; apply Rnot_le_lt; intro; apply (ln_beta_le radix2) in H5.
+ unfold canonic_exp in H4. apply (fexp_monotone prec emax) in H5.
+ unfold fexp, emin in H4. omega.
+ apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
+ }
+ assert (forall m1 m2 e1 e2, (Z2R (- Zpos m1) * bpow radix2 e1 < Z2R (Zpos m2) * bpow radix2 e2)%R).
+ {
+ intros; apply (Rlt_trans _ 0%R).
+ replace 0%R with (0*bpow radix2 e0)%R by ring; apply Rmult_lt_compat_r;
+ [apply bpow_gt_0; reflexivity|now apply (Z2R_lt _ 0)].
+ apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
+ }
+ destruct b, b0; try (now apply_Rcompare; apply H5); inversion H3;
+ try (apply_Rcompare; apply H4; rewrite H, H1 in H7; assumption);
+ try (apply_Rcompare; do 2 rewrite Z2R_opp, Ropp_mult_distr_l_reverse;
+ apply Ropp_lt_contravar; apply H4; rewrite H, H1 in H7; assumption);
+ rewrite H7, Rcompare_mult_r, Rcompare_Z2R by (apply bpow_gt_0); reflexivity.
+Qed.
+
+Theorem Bcompare_swap:
+ forall x y,
+ Bcompare y x = match Bcompare x y with Some c => Some (CompOpp c) | None => None end.
+Proof.
+ intros.
+ destruct x as [ ? | [] | ? ? | [] mx ex Bx ];
+ destruct y as [ ? | [] | ? ? | [] my ey By ]; simpl; auto.
+- rewrite <- (Zcompare_antisym ex ey). destruct (ex ?= ey)%Z; auto.
+ simpl. f_equal; f_equal. symmetry. apply Pcompare_antisym.
+- rewrite <- (Zcompare_antisym ex ey). destruct (ex ?= ey)%Z; auto.
+ simpl. f_equal. symmetry. apply Pcompare_antisym.
+Qed.
+
+(** ** Absolute value *)
+
+Definition Babs abs_nan (x: binary_float) : binary_float :=
+ match x with
+ | B754_nan sx plx =>
+ let '(sres, plres) := abs_nan sx plx in B754_nan _ _ sres plres
+ | B754_infinity sx => B754_infinity _ _ false
+ | B754_finite sx mx ex Hx => B754_finite _ _ false mx ex Hx
+ | B754_zero sx => B754_zero _ _ false
+ end.
+
+Theorem B2R_Babs :
+ forall abs_nan x,
+ B2R _ _ (Babs abs_nan x) = Rabs (B2R _ _ x).
+Proof.
+ intros abs_nan [sx|sx|sx plx|sx mx ex Hx]; apply sym_eq ; try apply Rabs_R0.
+ simpl. destruct abs_nan. simpl. apply Rabs_R0.
+ simpl. rewrite <- F2R_abs. destruct sx; auto.
+Qed.
+
+Theorem is_finite_Babs :
+ forall abs_nan x,
+ is_finite _ _ (Babs abs_nan x) = is_finite _ _ x.
+Proof.
+ intros abs_nan [| | |] ; try easy.
+ intros s pl.
+ simpl.
+ now case abs_nan.
+Qed.
+
+Theorem sign_Babs:
+ forall abs_nan x,
+ is_nan _ _ x = false ->
+ Bsign _ _ (Babs abs_nan x) = false.
+Proof.
+ intros abs_nan [| | |]; reflexivity || discriminate.
+Qed.
+
+Theorem Babs_idempotent :
+ forall abs_nan (x: binary_float),
+ is_nan _ _ x = false ->
+ Babs abs_nan (Babs abs_nan x) = Babs abs_nan x.
+Proof.
+ now intros abs_nan [sx|sx|sx plx|sx mx ex Hx] ; auto.
+Qed.
+
+Theorem Babs_opp:
+ forall abs_nan opp_nan x,
+ is_nan _ _ x = false ->
+ Babs abs_nan (Bopp _ _ opp_nan x) = Babs abs_nan x.
+Proof.
+ intros abs_nan opp_nan [| | |]; reflexivity || discriminate.
+Qed.
+
+(** ** Conversion from an integer to a FP number *)
+
+(** Integers that can be represented exactly as FP numbers. *)
+
+Definition integer_representable (n: Z): Prop :=
+ Z.abs n <= 2^emax - 2^(emax - prec) /\ generic_format radix2 fexp (Z2R n).
+
+Let int_upper_bound_eq: 2^emax - 2^(emax - prec) = (2^prec - 1) * 2^(emax - prec).
+Proof.
+ red in prec_gt_0_.
+ ring_simplify. rewrite <- (Zpower_plus radix2) by omega. f_equal. f_equal. omega.
+Qed.
+
+Lemma integer_representable_n2p:
+ forall n p,
+ -2^prec < n < 2^prec -> 0 <= p -> p <= emax - prec ->
+ integer_representable (n * 2^p).
+Proof.
+ intros; split.
+- red in prec_gt_0_. replace (Z.abs (n * 2^p)) with (Z.abs n * 2^p).
+ rewrite int_upper_bound_eq.
+ apply Zmult_le_compat. zify; omega. apply (Zpower_le radix2); omega.
+ zify; omega. apply (Zpower_ge_0 radix2).
+ rewrite Z.abs_mul. f_equal. rewrite Z.abs_eq. auto. apply (Zpower_ge_0 radix2).
+- apply generic_format_FLT. exists (Float radix2 n p).
+ unfold F2R; simpl.
+ split. rewrite <- Z2R_Zpower by auto. apply Z2R_mult.
+ split. zify; omega.
+ unfold emin; red in prec_gt_0_; omega.
+Qed.
+
+Lemma integer_representable_2p:
+ forall p,
+ 0 <= p <= emax - 1 ->
+ integer_representable (2^p).
+Proof.
+ intros; split.
+- red in prec_gt_0_.
+ rewrite Z.abs_eq by (apply (Zpower_ge_0 radix2)).
+ apply Zle_trans with (2^(emax-1)).
+ apply (Zpower_le radix2); omega.
+ assert (2^emax = 2^(emax-1)*2).
+ { change 2 with (2^1) at 3. rewrite <- (Zpower_plus radix2) by omega.
+ f_equal. omega. }
+ assert (2^(emax - prec) <= 2^(emax - 1)).
+ { apply (Zpower_le radix2). omega. }
+ omega.
+- red in prec_gt_0_.
+ apply generic_format_FLT. exists (Float radix2 1 p).
+ unfold F2R; simpl.
+ split. rewrite Rmult_1_l. rewrite <- Z2R_Zpower. auto. omega.
+ split. change 1 with (2^0). apply (Zpower_lt radix2). omega. auto.
+ unfold emin; omega.
+Qed.
+
+Lemma integer_representable_opp:
+ forall n, integer_representable n -> integer_representable (-n).
+Proof.
+ intros n (A & B); split. rewrite Z.abs_opp. auto.
+ rewrite Z2R_opp. apply generic_format_opp; auto.
+Qed.
+
+Lemma integer_representable_n2p_wide:
+ forall n p,
+ -2^prec <= n <= 2^prec -> 0 <= p -> p < emax - prec ->
+ integer_representable (n * 2^p).
+Proof.
+ intros. red in prec_gt_0_.
+ destruct (Z.eq_dec n (2^prec)); [idtac | destruct (Z.eq_dec n (-2^prec))].
+- rewrite e. rewrite <- (Zpower_plus radix2) by omega.
+ apply integer_representable_2p. omega.
+- rewrite e. rewrite <- Zopp_mult_distr_l. apply integer_representable_opp.
+ rewrite <- (Zpower_plus radix2) by omega.
+ apply integer_representable_2p. omega.
+- apply integer_representable_n2p; omega.
+Qed.
+
+Lemma integer_representable_n:
+ forall n, -2^prec <= n <= 2^prec -> integer_representable n.
+Proof.
+ red in prec_gt_0_. intros.
+ replace n with (n * 2^0) by (change (2^0) with 1; ring).
+ apply integer_representable_n2p_wide. auto. omega. omega.
+Qed.
+
+Lemma round_int_no_overflow:
+ forall n,
+ Z.abs n <= 2^emax - 2^(emax-prec) ->
+ (Rabs (round radix2 fexp (round_mode mode_NE) (Z2R n)) < bpow radix2 emax)%R.
+Proof.
+ intros. red in prec_gt_0_.
+ rewrite <- round_NE_abs.
+ apply Rle_lt_trans with (Z2R (2^emax - 2^(emax-prec))).
+ apply round_le_generic. apply fexp_correct; auto. apply valid_rnd_N.
+ apply generic_format_FLT. exists (Float radix2 (2^prec-1) (emax-prec)).
+ rewrite int_upper_bound_eq. unfold F2R; simpl.
+ split. rewrite <- Z2R_Zpower by omega. rewrite <- Z2R_mult. auto.
+ split. assert (0 < 2^prec) by (apply (Zpower_gt_0 radix2); omega). zify; omega.
+ unfold emin; omega.
+ rewrite <- Z2R_abs. apply Z2R_le. auto.
+ rewrite <- Z2R_Zpower by omega. apply Z2R_lt. simpl.
+ assert (0 < 2^(emax-prec)) by (apply (Zpower_gt_0 radix2); omega).
+ omega.
+ apply fexp_correct. auto.
+Qed.
+
+(** Conversion from an integer. Round to nearest. *)
+
+Definition BofZ (n: Z) : binary_float :=
+ binary_normalize prec emax prec_gt_0_ Hmax mode_NE n 0 false.
+
+Theorem BofZ_correct:
+ forall n,
+ if Rlt_bool (Rabs (round radix2 fexp (round_mode mode_NE) (Z2R n))) (bpow radix2 emax)
+ then
+ B2R prec emax (BofZ n) = round radix2 fexp (round_mode mode_NE) (Z2R n) /\
+ is_finite _ _ (BofZ n) = true /\
+ Bsign prec emax (BofZ n) = Zlt_bool n 0
+ else
+ B2FF prec emax (BofZ n) = binary_overflow prec emax mode_NE (Zlt_bool n 0).
+Proof.
+ intros.
+ generalize (binary_normalize_correct prec emax prec_gt_0_ Hmax mode_NE n 0 false).
+ fold emin; fold fexp; fold (BofZ n).
+ replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n).
+ destruct Rlt_bool.
+- intros (A & B & C). split; [|split].
+ + auto.
+ + auto.
+ + rewrite C. change 0%R with (Z2R 0). rewrite Rcompare_Z2R.
+ unfold Zlt_bool. auto.
+- intros A; rewrite A. f_equal. change 0%R with (Z2R 0).
+ generalize (Zlt_bool_spec n 0); intros SPEC; inversion SPEC.
+ apply Rlt_bool_true; apply Z2R_lt; auto.
+ apply Rlt_bool_false; apply Z2R_le; auto.
+- unfold F2R; simpl. ring.
+Qed.
+
+Theorem BofZ_finite:
+ forall n,
+ Z.abs n <= 2^emax - 2^(emax-prec) ->
+ B2R _ _ (BofZ n) = round radix2 fexp (round_mode mode_NE) (Z2R n)
+ /\ is_finite _ _ (BofZ n) = true
+ /\ Bsign _ _ (BofZ n) = Zlt_bool n 0%Z.
+Proof.
+ intros.
+ generalize (BofZ_correct n). rewrite Rlt_bool_true. auto.
+ apply round_int_no_overflow; auto.
+Qed.
+
+Theorem BofZ_representable:
+ forall n,
+ integer_representable n ->
+ B2R _ _ (BofZ n) = Z2R n
+ /\ is_finite _ _ (BofZ n) = true
+ /\ Bsign _ _ (BofZ n) = (n <? 0).
+Proof.
+ intros. destruct H as (P & Q). destruct (BofZ_finite n) as (A & B & C). auto.
+ intuition. rewrite A. apply round_generic. apply valid_rnd_round_mode. auto.
+Qed.
+
+Theorem BofZ_exact:
+ forall n,
+ -2^prec <= n <= 2^prec ->
+ B2R _ _ (BofZ n) = Z2R n
+ /\ is_finite _ _ (BofZ n) = true
+ /\ Bsign _ _ (BofZ n) = Zlt_bool n 0%Z.
+Proof.
+ intros. apply BofZ_representable. apply integer_representable_n; auto.
+Qed.
+
+Lemma BofZ_finite_pos0:
+ forall n,
+ Z.abs n <= 2^emax - 2^(emax-prec) -> is_finite_pos0 (BofZ n) = true.
+Proof.
+ intros.
+ generalize (binary_normalize_correct prec emax prec_gt_0_ Hmax mode_NE n 0 false).
+ fold emin; fold fexp; fold (BofZ n).
+ replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n) by
+ (unfold F2R; simpl; ring).
+ rewrite Rlt_bool_true by (apply round_int_no_overflow; auto).
+ intros (A & B & C).
+ destruct (BofZ n); auto; try discriminate.
+ simpl in *. rewrite C. change 0%R with (Z2R 0). rewrite Rcompare_Z2R.
+ generalize (Zcompare_spec n 0); intros SPEC; inversion SPEC; auto.
+ assert ((round radix2 fexp ZnearestE (Z2R n) <= -1)%R).
+ { change (-1)%R with (Z2R (-1)).
+ apply round_le_generic. apply fexp_correct. auto. apply valid_rnd_N.
+ apply (integer_representable_opp 1).
+ apply (integer_representable_2p 0).
+ red in prec_gt_0_; omega.
+ apply Z2R_le; omega.
+ }
+ lra.
+Qed.
+
+Lemma BofZ_finite_equal:
+ forall x y,
+ Z.abs x <= 2^emax - 2^(emax-prec) ->
+ Z.abs y <= 2^emax - 2^(emax-prec) ->
+ B2R _ _ (BofZ x) = B2R _ _ (BofZ y) ->
+ BofZ x = BofZ y.
+Proof.
+ intros. apply B2R_inj_pos0; auto; apply BofZ_finite_pos0; auto.
+Qed.
+
+(** Commutation properties with addition, subtraction, multiplication. *)
+
+Theorem BofZ_plus:
+ forall nan p q,
+ integer_representable p -> integer_representable q ->
+ Bplus _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p + q).
+Proof.
+ intros.
+ destruct (BofZ_representable p) as (A & B & C); auto.
+ destruct (BofZ_representable q) as (D & E & F); auto.
+ generalize (Bplus_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) B E).
+ fold emin; fold fexp.
+ rewrite A, D. rewrite <- Z2R_plus.
+ generalize (BofZ_correct (p + q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+ apply B2R_Bsign_inj; auto.
+ rewrite P, U; auto.
+ rewrite R, W, C, F.
+ change 0%R with (Z2R 0). rewrite Rcompare_Z2R. unfold Zlt_bool at 3.
+ generalize (Zcompare_spec (p + q) 0); intros SPEC; inversion SPEC; auto.
+ assert (EITHER: 0 <= p \/ 0 <= q) by omega.
+ destruct EITHER; [apply andb_false_intro1 | apply andb_false_intro2];
+ apply Zlt_bool_false; auto.
+- intros P (U & V).
+ apply B2FF_inj.
+ rewrite P, U, C. f_equal. rewrite C, F in V.
+ generalize (Zlt_bool_spec p 0) (Zlt_bool_spec q 0). rewrite <- V.
+ intros SPEC1 SPEC2; inversion SPEC1; inversion SPEC2; try congruence; symmetry.
+ apply Zlt_bool_true; omega.
+ apply Zlt_bool_false; omega.
+Qed.
+
+Theorem BofZ_minus:
+ forall nan p q,
+ integer_representable p -> integer_representable q ->
+ Bminus _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p - q).
+Proof.
+ intros.
+ destruct (BofZ_representable p) as (A & B & C); auto.
+ destruct (BofZ_representable q) as (D & E & F); auto.
+ generalize (Bminus_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) B E).
+ fold emin; fold fexp.
+ rewrite A, D. rewrite <- Z2R_minus.
+ generalize (BofZ_correct (p - q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+ apply B2R_Bsign_inj; auto.
+ rewrite P, U; auto.
+ rewrite R, W, C, F.
+ change 0%R with (Z2R 0). rewrite Rcompare_Z2R. unfold Zlt_bool at 3.
+ generalize (Zcompare_spec (p - q) 0); intros SPEC; inversion SPEC; auto.
+ assert (EITHER: 0 <= p \/ q < 0) by omega.
+ destruct EITHER; [apply andb_false_intro1 | apply andb_false_intro2].
+ rewrite Zlt_bool_false; auto.
+ rewrite Zlt_bool_true; auto.
+- intros P (U & V).
+ apply B2FF_inj.
+ rewrite P, U, C. f_equal. rewrite C, F in V.
+ generalize (Zlt_bool_spec p 0) (Zlt_bool_spec q 0). rewrite V.
+ intros SPEC1 SPEC2; inversion SPEC1; inversion SPEC2; symmetry.
+ rewrite <- H3 in H1; discriminate.
+ apply Zlt_bool_true; omega.
+ apply Zlt_bool_false; omega.
+ rewrite <- H3 in H1; discriminate.
+Qed.
+
+Theorem BofZ_mult:
+ forall nan p q,
+ integer_representable p -> integer_representable q ->
+ 0 < q ->
+ Bmult _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q) = BofZ (p * q).
+Proof.
+ intros.
+ assert (SIGN: xorb (p <? 0) (q <? 0) = (p * q <? 0)).
+ {
+ rewrite (Zlt_bool_false q) by omega.
+ generalize (Zlt_bool_spec p 0); intros SPEC; inversion SPEC; simpl; symmetry.
+ apply Zlt_bool_true. rewrite Z.mul_comm. apply Z.mul_pos_neg; omega.
+ apply Zlt_bool_false. apply Zsame_sign_imp; omega.
+ }
+ destruct (BofZ_representable p) as (A & B & C); auto.
+ destruct (BofZ_representable q) as (D & E & F); auto.
+ generalize (Bmult_correct _ _ _ Hmax nan mode_NE (BofZ p) (BofZ q)).
+ fold emin; fold fexp.
+ rewrite A, B, C, D, E, F. rewrite <- Z2R_mult.
+ generalize (BofZ_correct (p * q)). destruct Rlt_bool.
+- intros (P & Q & R) (U & V & W).
+ apply B2R_Bsign_inj; auto.
+ rewrite P, U; auto.
+ rewrite R, W; auto.
+ apply is_finite_not_is_nan; auto.
+- intros P U.
+ apply B2FF_inj. rewrite P, U. f_equal. auto.
+Qed.
+
+Theorem BofZ_mult_2p:
+ forall nan x p,
+ Z.abs x <= 2^emax - 2^(emax-prec) ->
+ 2^prec <= Z.abs x ->
+ 0 <= p <= emax - 1 ->
+ Bmult _ _ _ Hmax nan mode_NE (BofZ x) (BofZ (2^p)) = BofZ (x * 2^p).
+Proof.
+ intros.
+ destruct (Z.eq_dec x 0).
+- subst x. apply BofZ_mult.
+ apply integer_representable_n.
+ generalize (Zpower_ge_0 radix2 prec). simpl; omega.
+ apply integer_representable_2p. auto.
+ apply (Zpower_gt_0 radix2).
+ omega.
+- assert (Z2R x <> 0%R) by (apply (Z2R_neq _ _ n)).
+ destruct (BofZ_finite x H) as (A & B & C).
+ destruct (BofZ_representable (2^p)) as (D & E & F).
+ apply integer_representable_2p. auto.
+ assert (canonic_exp radix2 fexp (Z2R (x * 2^p)) =
+ canonic_exp radix2 fexp (Z2R x) + p).
+ {
+ unfold canonic_exp, fexp. rewrite Z2R_mult.
+ change (2^p) with (radix2^p). rewrite Z2R_Zpower by omega.
+ rewrite ln_beta_mult_bpow by auto.
+ assert (prec + 1 <= ln_beta radix2 (Z2R x)).
+ { rewrite <- (ln_beta_abs radix2 (Z2R x)).
+ rewrite <- (ln_beta_bpow radix2 prec).
+ apply ln_beta_le.
+ apply bpow_gt_0. rewrite <- Z2R_Zpower by (red in prec_gt_0_;omega).
+ rewrite <- Z2R_abs. apply Z2R_le; auto. }
+ unfold FLT_exp.
+ unfold emin; red in prec_gt_0_; zify; omega.
+ }
+ assert (forall m, round radix2 fexp m (Z2R x) * Z2R (2^p) =
+ round radix2 fexp m (Z2R (x * 2^p)))%R.
+ {
+ intros. unfold round, scaled_mantissa. rewrite H3.
+ rewrite Z2R_mult. rewrite Z.opp_add_distr. rewrite bpow_plus.
+ set (a := Z2R x); set (b := bpow radix2 (- canonic_exp radix2 fexp a)).
+ replace (a * Z2R (2^p) * (b * bpow radix2 (-p)))%R with (a * b)%R.
+ unfold F2R; simpl. rewrite Rmult_assoc. f_equal.
+ rewrite bpow_plus. f_equal. apply (Z2R_Zpower radix2). omega.
+ transitivity ((a * b) * (Z2R (2^p) * bpow radix2 (-p)))%R.
+ rewrite (Z2R_Zpower radix2). rewrite <- bpow_plus.
+ replace (p + -p) with 0 by omega. change (bpow radix2 0) with 1%R. ring.
+ omega.
+ ring.
+ }
+ assert (forall m x,
+ round radix2 fexp (round_mode m) (round radix2 fexp (round_mode m) x) =
+ round radix2 fexp (round_mode m) x).
+ {
+ intros. apply round_generic. apply valid_rnd_round_mode.
+ apply generic_format_round. apply fexp_correct; auto.
+ apply valid_rnd_round_mode.
+ }
+ assert (xorb (x <? 0) (2^p <? 0) = (x * 2^p <? 0)).
+ {
+ assert (0 < 2^p) by (apply (Zpower_gt_0 radix2); omega).
+ rewrite (Zlt_bool_false (2^p)) by omega. rewrite xorb_false_r.
+ symmetry. generalize (Zlt_bool_spec x 0); intros SPEC; inversion SPEC.
+ apply Zlt_bool_true. apply Z.mul_neg_pos; auto.
+ apply Zlt_bool_false. apply Z.mul_nonneg_nonneg; omega.
+ }
+ generalize (Bmult_correct _ _ _ Hmax nan mode_NE (BofZ x) (BofZ (2^p)))
+ (BofZ_correct (x * 2^p)).
+ fold emin; fold fexp. rewrite A, B, C, D, E, F, H4, H5.
+ destruct Rlt_bool.
++ intros (P & Q & R) (U & V & W).
+ apply B2R_Bsign_inj; auto.
+ rewrite P, U. auto.
+ rewrite R, W. auto.
+ apply is_finite_not_is_nan; auto.
++ intros P U.
+ apply B2FF_inj. rewrite P, U. f_equal; auto.
+Qed.
+
+(** Rounding to odd the argument of [BofZ]. *)
+
+Lemma round_odd_flt:
+ forall prec' emin' x choice,
+ prec > 1 -> prec' > 1 -> prec' >= prec + 2 -> emin' <= emin - 2 ->
+ round radix2 fexp (Znearest choice) (round radix2 (FLT_exp emin' prec') Zrnd_odd x) =
+ round radix2 fexp (Znearest choice) x.
+Proof.
+ intros. apply round_odd_prop. auto. apply fexp_correct; auto.
+ apply exists_NE_FLT. right; omega.
+ apply FLT_exp_valid. red; omega.
+ apply exists_NE_FLT. right; omega.
+ unfold fexp, FLT_exp; intros. zify; omega.
+Qed.
+
+Corollary round_odd_fix:
+ forall x p choice,
+ prec > 1 ->
+ 0 <= p ->
+ (bpow radix2 (prec + p + 1) <= Rabs x)%R ->
+ round radix2 fexp (Znearest choice) (round radix2 (FIX_exp p) Zrnd_odd x) =
+ round radix2 fexp (Znearest choice) x.
+Proof.
+ intros. destruct (Req_EM_T x 0%R).
+- subst x. rewrite round_0. auto. apply valid_rnd_odd.
+- set (prec' := ln_beta radix2 x - p).
+ set (emin' := emin - 2).
+ assert (PREC: ln_beta radix2 (bpow radix2 (prec + p + 1)) <= ln_beta radix2 x).
+ { rewrite <- (ln_beta_abs radix2 x).
+ apply ln_beta_le; auto. apply bpow_gt_0. }
+ rewrite ln_beta_bpow in PREC.
+ assert (CANON: canonic_exp radix2 (FLT_exp emin' prec') x =
+ canonic_exp radix2 (FIX_exp p) x).
+ {
+ unfold canonic_exp, FLT_exp, FIX_exp.
+ replace (ln_beta radix2 x - prec') with p by (unfold prec'; omega).
+ apply Z.max_l. unfold emin', emin. red in prec_gt_0_; omega.
+ }
+ assert (RND: round radix2 (FIX_exp p) Zrnd_odd x =
+ round radix2 (FLT_exp emin' prec') Zrnd_odd x).
+ {
+ unfold round, scaled_mantissa. rewrite CANON. auto.
+ }
+ rewrite RND.
+ apply round_odd_flt. auto.
+ unfold prec'. red in prec_gt_0_; omega.
+ unfold prec'. omega.
+ unfold emin'. omega.
+Qed.
+
+Definition int_round_odd (x: Z) (p: Z) :=
+ (if Z.eqb (x mod 2^p) 0 || Z.odd (x / 2^p) then x / 2^p else x / 2^p + 1) * 2^p.
+
+Lemma Zrnd_odd_int:
+ forall n p, 0 <= p ->
+ Zrnd_odd (Z2R n * bpow radix2 (-p)) * 2^p =
+ int_round_odd n p.
+Proof.
+ intros.
+ assert (0 < 2^p) by (apply (Zpower_gt_0 radix2); omega).
+ assert (n = (n / 2^p) * 2^p + n mod 2^p) by (rewrite Zmult_comm; apply Z.div_mod; omega).
+ assert (0 <= n mod 2^p < 2^p) by (apply Z_mod_lt; omega).
+ unfold int_round_odd. set (q := n / 2^p) in *; set (r := n mod 2^p) in *.
+ f_equal.
+ pose proof (bpow_gt_0 radix2 (-p)).
+ assert (bpow radix2 p * bpow radix2 (-p) = 1)%R.
+ { rewrite <- bpow_plus. replace (p + -p) with 0 by omega. auto. }
+ assert (Z2R n * bpow radix2 (-p) = Z2R q + Z2R r * bpow radix2 (-p))%R.
+ { rewrite H1. rewrite Z2R_plus, Z2R_mult.
+ change (Z2R (2^p)) with (Z2R (radix2^p)).
+ rewrite Z2R_Zpower by omega. ring_simplify.
+ rewrite Rmult_assoc. rewrite H4. ring. }
+ assert (0 <= Z2R r < bpow radix2 p)%R.
+ { split. change 0%R with (Z2R 0). apply Z2R_le; omega.
+ rewrite <- Z2R_Zpower by omega. apply Z2R_lt; tauto. }
+ assert (0 <= Z2R r * bpow radix2 (-p) < 1)%R.
+ { generalize (bpow_gt_0 radix2 (-p)). intros.
+ split. apply Rmult_le_pos; lra.
+ rewrite <- H4. apply Rmult_lt_compat_r. auto. tauto. }
+ assert (Zfloor (Z2R n * bpow radix2 (-p)) = q).
+ { apply Zfloor_imp. rewrite H5. rewrite Z2R_plus. change (Z2R 1) with 1%R. lra. }
+ unfold Zrnd_odd. destruct Req_EM_T.
+- assert (Z2R r * bpow radix2 (-p) = 0)%R.
+ { rewrite H8 in e. rewrite e in H5. lra. }
+ apply Rmult_integral in H9. destruct H9; [ | lra ].
+ apply (eq_Z2R r 0) in H9. apply <- Z.eqb_eq in H9. rewrite H9. assumption.
+- assert (Z2R r * bpow radix2 (-p) <> 0)%R.
+ { rewrite H8 in n0. lra. }
+ destruct (Z.eqb r 0) eqn:RZ.
+ apply Z.eqb_eq in RZ. rewrite RZ in H9. change (Z2R 0) with 0%R in H9.
+ rewrite Rmult_0_l in H9. congruence.
+ rewrite Zceil_floor_neq by lra. rewrite H8.
+ change Zeven with Z.even. rewrite Zodd_even_bool. destruct (Z.even q); auto.
+Qed.
+
+Lemma int_round_odd_le:
+ forall p x y, 0 <= p ->
+ x <= y -> int_round_odd x p <= int_round_odd y p.
+Proof.
+ intros.
+ assert (Zrnd_odd (Z2R x * bpow radix2 (-p)) <= Zrnd_odd (Z2R y * bpow radix2 (-p))).
+ { apply Zrnd_le. apply valid_rnd_odd. apply Rmult_le_compat_r. apply bpow_ge_0.
+ apply Z2R_le; auto. }
+ rewrite <- ! Zrnd_odd_int by auto.
+ apply Zmult_le_compat_r. auto. apply (Zpower_ge_0 radix2).
+Qed.
+
+Lemma int_round_odd_exact:
+ forall p x, 0 <= p ->
+ (2^p | x) -> int_round_odd x p = x.
+Proof.
+ intros. unfold int_round_odd. apply Znumtheory.Zdivide_mod in H0.
+ rewrite H0. simpl. rewrite Zmult_comm. symmetry. apply Z_div_exact_2.
+ apply Zlt_gt. apply (Zpower_gt_0 radix2). auto. auto.
+Qed.
+
+Theorem BofZ_round_odd:
+ forall x p,
+ prec > 1 ->
+ Z.abs x <= 2^emax - 2^(emax-prec) ->
+ 0 <= p <= emax - prec ->
+ 2^(prec + p + 1) <= Z.abs x ->
+ BofZ x = BofZ (int_round_odd x p).
+Proof.
+ intros x p PREC XRANGE PRANGE XGE.
+ assert (DIV: (2^p | 2^emax - 2^(emax - prec))).
+ { rewrite int_upper_bound_eq. apply Z.divide_mul_r.
+ exists (2^(emax - prec - p)). red in prec_gt_0_.
+ rewrite <- (Zpower_plus radix2) by omega. f_equal; omega. }
+ assert (YRANGE: Z.abs (int_round_odd x p) <= 2^emax - 2^(emax-prec)).
+ { apply Z.abs_le. split.
+ replace (-(2^emax - 2^(emax-prec))) with (int_round_odd (-(2^emax - 2^(emax-prec))) p).
+ apply int_round_odd_le; zify; omega.
+ apply int_round_odd_exact. omega. apply Z.divide_opp_r. auto.
+ replace (2^emax - 2^(emax-prec)) with (int_round_odd (2^emax - 2^(emax-prec)) p).
+ apply int_round_odd_le; zify; omega.
+ apply int_round_odd_exact. omega. auto. }
+ destruct (BofZ_finite x XRANGE) as (X1 & X2 & X3).
+ destruct (BofZ_finite (int_round_odd x p) YRANGE) as (Y1 & Y2 & Y3).
+ apply BofZ_finite_equal; auto.
+ rewrite X1, Y1.
+ assert (Z2R (int_round_odd x p) = round radix2 (FIX_exp p) Zrnd_odd (Z2R x)).
+ {
+ unfold round, scaled_mantissa, canonic_exp, FIX_exp.
+ rewrite <- Zrnd_odd_int by omega.
+ unfold F2R; simpl. rewrite Z2R_mult. f_equal. apply (Z2R_Zpower radix2). omega.
+ }
+ rewrite H. symmetry. apply round_odd_fix. auto. omega.
+ rewrite <- Z2R_Zpower. rewrite <- Z2R_abs. apply Z2R_le; auto.
+ red in prec_gt_0_; omega.
+Qed.
+
+Lemma int_round_odd_shifts:
+ forall x p, 0 <= p ->
+ int_round_odd x p =
+ Z.shiftl (if Z.eqb (x mod 2^p) 0 then Z.shiftr x p else Z.lor (Z.shiftr x p) 1) p.
+Proof.
+ intros.
+ unfold int_round_odd. rewrite Z.shiftl_mul_pow2 by auto. f_equal.
+ rewrite Z.shiftr_div_pow2 by auto.
+ destruct (x mod 2^p =? 0) eqn:E. auto.
+ assert (forall n, (if Z.odd n then n else n + 1) = Z.lor n 1).
+ { destruct n; simpl; auto.
+ destruct p0; auto.
+ destruct p0; auto. induction p0; auto. }
+ simpl. apply H0.
+Qed.
+
+Lemma int_round_odd_bits:
+ forall x y p, 0 <= p ->
+ (forall i, 0 <= i < p -> Z.testbit y i = false) ->
+ Z.testbit y p = (if Z.eqb (x mod 2^p) 0 then Z.testbit x p else true) ->
+ (forall i, p < i -> Z.testbit y i = Z.testbit x i) ->
+ int_round_odd x p = y.
+Proof.
+ intros until p; intros PPOS BELOW AT ABOVE.
+ rewrite int_round_odd_shifts by auto.
+ apply Z.bits_inj'. intros.
+ generalize (Zcompare_spec n p); intros SPEC; inversion SPEC.
+- rewrite BELOW by auto. apply Z.shiftl_spec_low; auto.
+- subst n. rewrite AT. rewrite Z.shiftl_spec_high by omega.
+ replace (p - p) with 0 by omega.
+ destruct (x mod 2^p =? 0).
+ + rewrite Z.shiftr_spec by omega. f_equal; omega.
+ + rewrite Z.lor_spec. apply orb_true_r.
+- rewrite ABOVE by auto. rewrite Z.shiftl_spec_high by omega.
+ destruct (x mod 2^p =? 0).
+ rewrite Z.shiftr_spec by omega. f_equal; omega.
+ rewrite Z.lor_spec, Z.shiftr_spec by omega.
+ change 1 with (Z.ones 1). rewrite Z.ones_spec_high by omega. rewrite orb_false_r.
+ f_equal; omega.
+Qed.
+
+(** ** Conversion from a FP number to an integer *)
+
+(** Always rounds toward zero. *)
+
+Definition ZofB (f: binary_float): option Z :=
+ match f with
+ | B754_finite s m (Zpos e) _ => Some (cond_Zopp s (Zpos m) * Zpower_pos radix2 e)%Z
+ | B754_finite s m 0 _ => Some (cond_Zopp s (Zpos m))
+ | B754_finite s m (Zneg e) _ => Some (cond_Zopp s (Zpos m / Zpower_pos radix2 e))%Z
+ | B754_zero _ => Some 0%Z
+ | _ => None
+ end.
+
+Theorem ZofB_correct:
+ forall f,
+ ZofB f = if is_finite _ _ f then Some (Ztrunc (B2R _ _ f)) else None.
+Proof.
+ destruct f; simpl; auto.
+- f_equal. symmetry. apply (Ztrunc_Z2R 0).
+- destruct e; f_equal.
+ + unfold F2R; simpl. rewrite Rmult_1_r. rewrite Ztrunc_Z2R. auto.
+ + unfold F2R; simpl. rewrite <- Z2R_mult. rewrite Ztrunc_Z2R. auto.
+ + unfold F2R; simpl. rewrite Z2R_cond_Zopp. rewrite <- cond_Ropp_mult_l.
+ assert (EQ: forall x, Ztrunc (cond_Ropp b x) = cond_Zopp b (Ztrunc x)).
+ {
+ intros. destruct b; simpl; auto. apply Ztrunc_opp.
+ }
+ rewrite EQ. f_equal.
+ generalize (Zpower_pos_gt_0 2 p (refl_equal _)); intros.
+ rewrite Ztrunc_floor. symmetry. apply Zfloor_div. omega.
+ apply Rmult_le_pos. apply (Z2R_le 0). compute; congruence.
+ apply Rlt_le. apply Rinv_0_lt_compat. apply (Z2R_lt 0). auto.
+Qed.
+
+(** Interval properties. *)
+
+Remark Ztrunc_range_pos:
+ forall x, 0 < Ztrunc x -> (Z2R (Ztrunc x) <= x < Z2R (Ztrunc x + 1)%Z)%R.
+Proof.
+ intros.
+ rewrite Ztrunc_floor. split. apply Zfloor_lb. rewrite Z2R_plus. apply Zfloor_ub.
+ generalize (Rle_bool_spec 0%R x). intros RLE; inversion RLE; subst; clear RLE.
+ auto.
+ rewrite Ztrunc_ceil in H by lra. unfold Zceil in H.
+ assert (-x < 0)%R.
+ { apply Rlt_le_trans with (Z2R (Zfloor (-x)) + 1)%R. apply Zfloor_ub.
+ change 0%R with (Z2R 0). change 1%R with (Z2R 1). rewrite <- Z2R_plus.
+ apply Z2R_le. omega. }
+ lra.
+Qed.
+
+Remark Ztrunc_range_zero:
+ forall x, Ztrunc x = 0 -> (-1 < x < 1)%R.
+Proof.
+ intros; generalize (Rle_bool_spec 0%R x). intros RLE; inversion RLE; subst; clear RLE.
+- rewrite Ztrunc_floor in H by auto. split.
+ + apply Rlt_le_trans with 0%R; auto. rewrite <- Ropp_0. apply Ropp_lt_contravar. apply Rlt_0_1.
+ + replace 1%R with (Z2R (Zfloor x) + 1)%R. apply Zfloor_ub. rewrite H. simpl. apply Rplus_0_l.
+- rewrite Ztrunc_ceil in H by (apply Rlt_le; auto). split.
+ + apply Ropp_lt_cancel. rewrite Ropp_involutive.
+ replace 1%R with (Z2R (Zfloor (-x)) + 1)%R. apply Zfloor_ub.
+ unfold Zceil in H. replace (Zfloor (-x)) with 0 by omega. simpl. apply Rplus_0_l.
+ + apply Rlt_le_trans with 0%R; auto. apply Rle_0_1.
+Qed.
+
+Theorem ZofB_range_pos:
+ forall f n, ZofB f = Some n -> 0 < n -> (Z2R n <= B2R _ _ f < Z2R (n + 1)%Z)%R.
+Proof.
+ intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+ apply Ztrunc_range_pos. congruence.
+Qed.
+
+Theorem ZofB_range_neg:
+ forall f n, ZofB f = Some n -> n < 0 -> (Z2R (n - 1)%Z < B2R _ _ f <= Z2R n)%R.
+Proof.
+ intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+ set (x := B2R prec emax f) in *. set (y := (-x)%R).
+ assert (A: (Z2R (Ztrunc y) <= y < Z2R (Ztrunc y + 1)%Z)%R).
+ { apply Ztrunc_range_pos. unfold y. rewrite Ztrunc_opp. omega. }
+ destruct A as [B C].
+ unfold y in B, C. rewrite Ztrunc_opp in B, C.
+ replace (- Ztrunc x + 1) with (- (Ztrunc x - 1)) in C by omega.
+ rewrite Z2R_opp in B, C. lra.
+Qed.
+
+Theorem ZofB_range_zero:
+ forall f, ZofB f = Some 0 -> (-1 < B2R _ _ f < 1)%R.
+Proof.
+ intros. rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; inversion H.
+ apply Ztrunc_range_zero. auto.
+Qed.
+
+Theorem ZofB_range_nonneg:
+ forall f n, ZofB f = Some n -> 0 <= n -> (-1 < B2R _ _ f < Z2R (n + 1)%Z)%R.
+Proof.
+ intros. destruct (Z.eq_dec n 0).
+- subst n. apply ZofB_range_zero. auto.
+- destruct (ZofB_range_pos f n) as (A & B). auto. omega.
+ split; auto. apply Rlt_le_trans with (Z2R 0). simpl; lra.
+ apply Rle_trans with (Z2R n); auto. apply Z2R_le; auto.
+Qed.
+
+(** For representable integers, [ZofB] is left inverse of [BofZ]. *)
+
+Theorem ZofBofZ_exact:
+ forall n, integer_representable n -> ZofB (BofZ n) = Some n.
+Proof.
+ intros. destruct (BofZ_representable n H) as (A & B & C).
+ rewrite ZofB_correct. rewrite A, B. f_equal. apply Ztrunc_Z2R.
+Qed.
+
+(** Compatibility with subtraction *)
+
+Remark Zfloor_minus:
+ forall x n, Zfloor (x - Z2R n) = Zfloor x - n.
+Proof.
+ intros. apply Zfloor_imp. replace (Zfloor x - n + 1) with ((Zfloor x + 1) - n) by omega.
+ rewrite ! Z2R_minus. unfold Rminus. split.
+ apply Rplus_le_compat_r. apply Zfloor_lb.
+ apply Rplus_lt_compat_r. rewrite Z2R_plus. apply Zfloor_ub.
+Qed.
+
+Theorem ZofB_minus:
+ forall minus_nan m f p q,
+ ZofB f = Some p -> 0 <= p < 2*q -> q <= 2^prec -> (Z2R q <= B2R _ _ f)%R ->
+ ZofB (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) = Some (p - q).
+Proof.
+ intros.
+ assert (Q: -2^prec <= q <= 2^prec).
+ { split; auto. generalize (Zpower_ge_0 radix2 prec); simpl; omega. }
+ assert (RANGE: (-1 < B2R _ _ f < Z2R (p + 1)%Z)%R) by (apply ZofB_range_nonneg; auto; omega).
+ rewrite ZofB_correct in H. destruct (is_finite prec emax f) eqn:FIN; try discriminate.
+ assert (PQ2: (Z2R (p + 1) <= Z2R q * 2)%R).
+ { change 2%R with (Z2R 2). rewrite <- Z2R_mult. apply Z2R_le. omega. }
+ assert (EXACT: round radix2 fexp (round_mode m) (B2R _ _ f - Z2R q)%R = (B2R _ _ f - Z2R q)%R).
+ { apply round_generic. apply valid_rnd_round_mode.
+ apply sterbenz_aux. apply FLT_exp_monotone. apply generic_format_B2R.
+ apply integer_representable_n. auto. lra. }
+ destruct (BofZ_exact q Q) as (A & B & C).
+ generalize (Bminus_correct _ _ _ Hmax minus_nan m f (BofZ q) FIN B).
+ rewrite Rlt_bool_true.
+- fold emin; fold fexp. intros (D & E & F).
+ rewrite ZofB_correct. rewrite E. rewrite D. rewrite A. rewrite EXACT.
+ inversion H. f_equal. rewrite ! Ztrunc_floor. apply Zfloor_minus.
+ lra. lra.
+- rewrite A. fold emin; fold fexp. rewrite EXACT.
+ apply Rle_lt_trans with (bpow radix2 prec).
+ apply Rle_trans with (Z2R q). apply Rabs_le. lra.
+ rewrite <- Z2R_Zpower. apply Z2R_le; auto. red in prec_gt_0_; omega.
+ apply bpow_lt. auto.
+Qed.
+
+(** A variant of [ZofB] that bounds the range of representable integers. *)
+
+Definition ZofB_range (f: binary_float) (zmin zmax: Z): option Z :=
+ match ZofB f with
+ | None => None
+ | Some z => if Zle_bool zmin z && Zle_bool z zmax then Some z else None
+ end.
+
+Theorem ZofB_range_correct:
+ forall f min max,
+ let n := Ztrunc (B2R _ _ f) in
+ ZofB_range f min max =
+ if is_finite _ _ f && Zle_bool min n && Zle_bool n max then Some n else None.
+Proof.
+ intros. unfold ZofB_range. rewrite ZofB_correct. fold n.
+ destruct (is_finite prec emax f); auto.
+Qed.
+
+Lemma ZofB_range_inversion:
+ forall f min max n,
+ ZofB_range f min max = Some n ->
+ min <= n /\ n <= max /\ ZofB f = Some n.
+Proof.
+ intros. rewrite ZofB_range_correct in H. rewrite ZofB_correct.
+ destruct (is_finite prec emax f); try discriminate.
+ set (n1 := Ztrunc (B2R _ _ f)) in *.
+ destruct (min <=? n1) eqn:MIN; try discriminate.
+ destruct (n1 <=? max) eqn:MAX; try discriminate.
+ simpl in H. inversion H. subst n.
+ split. apply Zle_bool_imp_le; auto.
+ split. apply Zle_bool_imp_le; auto.
+ auto.
+Qed.
+
+Theorem ZofB_range_minus:
+ forall minus_nan m f p q,
+ ZofB_range f 0 (2 * q - 1) = Some p -> q <= 2^prec -> (Z2R q <= B2R _ _ f)%R ->
+ ZofB_range (Bminus _ _ _ Hmax minus_nan m f (BofZ q)) (-q) (q - 1) = Some (p - q).
+Proof.
+ intros. destruct (ZofB_range_inversion _ _ _ _ H) as (A & B & C).
+ set (f' := Bminus prec emax prec_gt_0_ Hmax minus_nan m f (BofZ q)).
+ assert (D: ZofB f' = Some (p - q)).
+ { apply ZofB_minus. auto. omega. auto. auto. }
+ unfold ZofB_range. rewrite D. rewrite Zle_bool_true by omega. rewrite Zle_bool_true by omega. auto.
+Qed.
+
+(** ** Algebraic identities *)
+
+(** Commutativity of addition and multiplication *)
+
+Theorem Bplus_commut:
+ forall plus_nan mode (x y: binary_float),
+ plus_nan x y = plus_nan y x ->
+ Bplus _ _ _ Hmax plus_nan mode x y = Bplus _ _ _ Hmax plus_nan mode y x.
+Proof.
+ intros until y; intros NAN.
+ pose proof (Bplus_correct _ _ _ Hmax plus_nan mode x y).
+ pose proof (Bplus_correct _ _ _ Hmax plus_nan mode y x).
+ unfold Bplus in *; destruct x; destruct y; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB; auto.
+ f_equal; apply eqb_prop; auto.
+- rewrite NAN; auto.
+- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB.
+ f_equal; apply eqb_prop; auto.
+ rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- generalize (H (refl_equal _) (refl_equal _)); clear H.
+ generalize (H0 (refl_equal _) (refl_equal _)); clear H0.
+ fold emin. fold fexp.
+ set (x := B754_finite prec emax b0 m0 e1 e2). set (rx := B2R _ _ x).
+ set (y := B754_finite prec emax b m e e0). set (ry := B2R _ _ y).
+ rewrite (Rplus_comm ry rx). destruct Rlt_bool.
+ + intros (A1 & A2 & A3) (B1 & B2 & B3).
+ apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto.
+ rewrite Z.add_comm. rewrite Z.min_comm. auto.
+ + intros (A1 & A2) (B1 & B2). apply B2FF_inj. rewrite B2 in B1. rewrite <- B1 in A1. auto.
+Qed.
+
+Theorem Bmult_commut:
+ forall mult_nan mode (x y: binary_float),
+ mult_nan x y = mult_nan y x ->
+ Bmult _ _ _ Hmax mult_nan mode x y = Bmult _ _ _ Hmax mult_nan mode y x.
+Proof.
+ intros until y; intros NAN.
+ pose proof (Bmult_correct _ _ _ Hmax mult_nan mode x y).
+ pose proof (Bmult_correct _ _ _ Hmax mult_nan mode y x).
+ unfold Bmult in *; destruct x; destruct y; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite NAN; auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite (xorb_comm b0 b); auto.
+- rewrite NAN; auto.
+- revert H H0. fold emin. fold fexp.
+ set (x := B754_finite prec emax b0 m0 e1 e2). set (rx := B2R _ _ x).
+ set (y := B754_finite prec emax b m e e0). set (ry := B2R _ _ y).
+ rewrite (Rmult_comm ry rx). destruct Rlt_bool.
+ + intros (A1 & A2 & A3) (B1 & B2 & B3).
+ apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto.
+ rewrite ! Bsign_FF2B. f_equal. f_equal. apply xorb_comm. apply Pos.mul_comm. apply Z.add_comm.
+ + intros A B. apply B2FF_inj. etransitivity. eapply A. rewrite xorb_comm. auto.
+Qed.
+
+(** Multiplication by 2 is diagonal addition. *)
+
+Theorem Bmult2_Bplus:
+ forall plus_nan mult_nan mode (f: binary_float),
+ (forall (x y: binary_float),
+ is_nan _ _ x = true -> is_finite _ _ y = true -> plus_nan x x = mult_nan x y) ->
+ Bplus _ _ _ Hmax plus_nan mode f f = Bmult _ _ _ Hmax mult_nan mode f (BofZ 2%Z).
+Proof.
+ intros until f; intros NAN.
+ destruct (BofZ_representable 2) as (A & B & C).
+ apply (integer_representable_2p 1). red in prec_gt_0_; omega.
+ pose proof (Bmult_correct _ _ _ Hmax mult_nan mode f (BofZ 2%Z)). fold emin in H.
+ rewrite A, B, C in H. rewrite xorb_false_r in H.
+ destruct (is_finite _ _ f) eqn:FIN.
+- pose proof (Bplus_correct _ _ _ Hmax plus_nan mode f f FIN FIN). fold emin in H0.
+ assert (EQ: (B2R prec emax f * Z2R 2%Z = B2R prec emax f + B2R prec emax f)%R).
+ { change (Z2R 2%Z) with 2%R. ring. }
+ rewrite <- EQ in H0. destruct Rlt_bool.
+ + destruct H0 as (P & Q & R). destruct H as (S & T & U).
+ apply B2R_Bsign_inj; auto.
+ rewrite P, S. auto.
+ rewrite R, U.
+ replace 0%R with (0 * Z2R 2%Z)%R by ring. rewrite Rcompare_mult_r.
+ rewrite andb_diag, orb_diag. destruct f; try discriminate; simpl.
+ rewrite Rcompare_Eq by auto. destruct mode; auto.
+ replace 0%R with (@F2R radix2 {| Fnum := 0%Z; Fexp := e |}).
+ rewrite Rcompare_F2R. destruct b; auto.
+ unfold F2R. simpl. ring.
+ change 0%R with (Z2R 0%Z). apply Z2R_lt. omega.
+ destruct (Bmult prec emax prec_gt_0_ Hmax mult_nan mode f (BofZ 2)); reflexivity || discriminate.
+ + destruct H0 as (P & Q). apply B2FF_inj. rewrite P, H. auto.
+- destruct f; try discriminate.
+ + simpl Bplus. rewrite eqb_true. destruct (BofZ 2) eqn:B2; try discriminate; simpl in *.
+ assert ((0 = 2)%Z) by (apply eq_Z2R; auto). discriminate.
+ subst b0. rewrite xorb_false_r. auto.
+ auto.
+ + unfold Bplus, Bmult. rewrite <- NAN by auto. auto.
+Qed.
+
+(** Divisions that can be turned into multiplications by an inverse *)
+
+Definition Bexact_inverse_mantissa := Z.iter (prec - 1) xO xH.
+
+Remark Bexact_inverse_mantissa_value:
+ Zpos Bexact_inverse_mantissa = 2 ^ (prec - 1).
+Proof.
+ assert (REC: forall n, Z.pos (nat_iter n xO xH) = 2 ^ (Z.of_nat n)).
+ { induction n. reflexivity.
+ simpl nat_iter. transitivity (2 * Z.pos (nat_iter n xO xH)). reflexivity.
+ rewrite inj_S. rewrite IHn. unfold Z.succ. rewrite Zpower_plus by omega.
+ change (2 ^ 1) with 2. ring. }
+ red in prec_gt_0_.
+ unfold Bexact_inverse_mantissa. rewrite iter_nat_of_Z by omega. rewrite REC.
+ rewrite Zabs2Nat.id_abs. rewrite Z.abs_eq by omega. auto.
+Qed.
+
+Remark Bexact_inverse_mantissa_digits2_Pnat:
+ digits2_Pnat Bexact_inverse_mantissa = Z.to_nat (prec - 1).
+Proof.
+ assert (DIGITS: forall n, digits2_Pnat (nat_iter n xO xH) = n).
+ { induction n; simpl. auto. congruence. }
+ red in prec_gt_0_.
+ unfold Bexact_inverse_mantissa. rewrite iter_nat_of_Z by omega. rewrite DIGITS.
+ apply Zabs2Nat.abs_nat_nonneg. omega.
+Qed.
+
+Remark bounded_Bexact_inverse:
+ forall e,
+ emin <= e <= emax - prec <-> bounded prec emax Bexact_inverse_mantissa e = true.
+Proof.
+ intros. unfold bounded, canonic_mantissa. rewrite andb_true_iff.
+ rewrite <- Zeq_is_eq_bool. rewrite <- Zle_is_le_bool.
+ rewrite Bexact_inverse_mantissa_digits2_Pnat.
+ rewrite inj_S. red in prec_gt_0_. rewrite Z2Nat.id by omega.
+ split.
+- intros; split. unfold FLT_exp. unfold emin in H. zify; omega. omega.
+- intros [A B]. unfold FLT_exp in A. unfold emin. zify; omega.
+Qed.
+
+Program Definition Bexact_inverse (f: binary_float) : option binary_float :=
+ match f with
+ | B754_finite s m e B =>
+ if positive_eq_dec m Bexact_inverse_mantissa then
+ let e' := -e - (prec - 1) * 2 in
+ if Z_le_dec emin e' then
+ if Z_le_dec e' emax then
+ Some(B754_finite _ _ s m e' _)
+ else None else None else None
+ | _ => None
+ end.
+Next Obligation.
+ rewrite <- bounded_Bexact_inverse in B. rewrite <- bounded_Bexact_inverse.
+ unfold emin in *. omega.
+Qed.
+
+Lemma Bexact_inverse_correct:
+ forall f f', Bexact_inverse f = Some f' ->
+ is_finite_strict _ _ f = true
+ /\ is_finite_strict _ _ f' = true
+ /\ B2R _ _ f' = (/ B2R _ _ f)%R
+ /\ B2R _ _ f <> 0%R
+ /\ Bsign _ _ f' = Bsign _ _ f.
+Proof with (try discriminate).
+ intros f f' EI. unfold Bexact_inverse in EI. destruct f...
+ destruct (Pos.eq_dec m Bexact_inverse_mantissa)...
+ set (e' := -e - (prec - 1) * 2) in *.
+ destruct (Z_le_dec emin e')...
+ destruct (Z_le_dec e' emax)...
+ inversion EI; clear EI; subst f' m.
+ split. auto. split. auto. split. unfold B2R. rewrite Bexact_inverse_mantissa_value.
+ unfold F2R; simpl. rewrite Z2R_cond_Zopp.
+ rewrite <- ! cond_Ropp_mult_l.
+ red in prec_gt_0_.
+ replace (Z2R (2 ^ (prec - 1))) with (bpow radix2 (prec - 1))
+ by (symmetry; apply (Z2R_Zpower radix2); omega).
+ rewrite <- ! bpow_plus.
+ replace (prec - 1 + e') with (- (prec - 1 + e)) by (unfold e'; omega).
+ rewrite bpow_opp. unfold cond_Ropp; destruct b; auto.
+ rewrite Ropp_inv_permute. auto. apply Rgt_not_eq. apply bpow_gt_0.
+ split. simpl. red; intros. apply F2R_eq_0_reg in H. destruct b; simpl in H; discriminate.
+ auto.
+Qed.
+
+Theorem Bdiv_mult_inverse:
+ forall div_nan mult_nan mode x y z,
+ (forall (x y z: binary_float),
+ is_nan _ _ x = true -> is_finite _ _ y = true -> is_finite _ _ z = true ->
+ div_nan x y = mult_nan x z) ->
+ Bexact_inverse y = Some z ->
+ Bdiv _ _ _ Hmax div_nan mode x y = Bmult _ _ _ Hmax mult_nan mode x z.
+Proof.
+ intros until z; intros NAN; intros. destruct (Bexact_inverse_correct _ _ H) as (A & B & C & D & E).
+ pose proof (Bmult_correct _ _ _ Hmax mult_nan mode x z).
+ fold emin in H0. fold fexp in H0.
+ pose proof (Bdiv_correct _ _ _ Hmax div_nan mode x y D).
+ fold emin in H1. fold fexp in H1.
+ unfold Rdiv in H1. rewrite <- C in H1.
+ destruct (is_finite _ _ x) eqn:FINX.
+- destruct Rlt_bool.
+ + destruct H0 as (P & Q & R). destruct H1 as (S & T & U).
+ apply B2R_Bsign_inj; auto.
+ rewrite Q. simpl. apply is_finite_strict_finite; auto.
+ rewrite P, S. auto.
+ rewrite R, U, E. auto.
+ apply is_finite_not_is_nan; auto.
+ apply is_finite_not_is_nan. rewrite Q. simpl. apply is_finite_strict_finite; auto. + apply B2FF_inj. rewrite H0, H1. rewrite E. auto.
+- destruct y; try discriminate. destruct z; try discriminate.
+ destruct x; try discriminate; simpl.
+ + simpl in E; congruence.
+ + erewrite NAN; eauto.
+Qed.
+
+End Extra_ops.
+
+(** ** Conversions between two FP formats *)
+
+Section Conversions.
+
+Variable prec1 emax1 prec2 emax2 : Z.
+Context (prec1_gt_0_ : Prec_gt_0 prec1) (prec2_gt_0_ : Prec_gt_0 prec2).
+Let emin1 := (3 - emax1 - prec1)%Z.
+Let fexp1 := FLT_exp emin1 prec1.
+Let emin2 := (3 - emax2 - prec2)%Z.
+Let fexp2 := FLT_exp emin2 prec2.
+Hypothesis Hmax1 : (prec1 < emax1)%Z.
+Hypothesis Hmax2 : (prec2 < emax2)%Z.
+Let binary_float1 := binary_float prec1 emax1.
+Let binary_float2 := binary_float prec2 emax2.
+
+Definition Bconv (conv_nan: bool -> nan_pl prec1 -> bool * nan_pl prec2) (md: mode) (f: binary_float1) : binary_float2 :=
+ match f with
+ | B754_nan s pl => let '(s, pl) := conv_nan s pl in B754_nan _ _ s pl
+ | B754_infinity s => B754_infinity _ _ s
+ | B754_zero s => B754_zero _ _ s
+ | B754_finite s m e _ => binary_normalize _ _ _ Hmax2 md (cond_Zopp s (Zpos m)) e s
+ end.
+
+Theorem Bconv_correct:
+ forall conv_nan m f,
+ is_finite _ _ f = true ->
+ if Rlt_bool (Rabs (round radix2 fexp2 (round_mode m) (B2R _ _ f))) (bpow radix2 emax2)
+ then
+ B2R _ _ (Bconv conv_nan m f) = round radix2 fexp2 (round_mode m) (B2R _ _ f)
+ /\ is_finite _ _ (Bconv conv_nan m f) = true
+ /\ Bsign _ _ (Bconv conv_nan m f) = Bsign _ _ f
+ else
+ B2FF _ _ (Bconv conv_nan m f) = binary_overflow prec2 emax2 m (Bsign _ _ f).
+Proof.
+ intros. destruct f; try discriminate.
+- simpl. rewrite round_0. rewrite Rabs_R0. rewrite Rlt_bool_true. auto.
+ apply bpow_gt_0. apply valid_rnd_round_mode.
+- generalize (binary_normalize_correct _ _ _ Hmax2 m (cond_Zopp b (Zpos m0)) e b).
+ fold emin2; fold fexp2. simpl. destruct Rlt_bool.
+ + intros (A & B & C). split. auto. split. auto. rewrite C.
+ destruct b; simpl.
+ rewrite Rcompare_Lt. auto. apply F2R_lt_0_compat. simpl. compute; auto.
+ rewrite Rcompare_Gt. auto. apply F2R_gt_0_compat. simpl. compute; auto.
+ + intros A. rewrite A. f_equal. destruct b.
+ apply Rlt_bool_true. apply F2R_lt_0_compat. simpl. compute; auto.
+ apply Rlt_bool_false. apply Rlt_le. apply Rgt_lt. apply F2R_gt_0_compat. simpl. compute; auto.
+Qed.
+
+(** Converting a finite FP number to higher or equal precision preserves its value. *)
+
+Theorem Bconv_widen_exact:
+ (prec2 >= prec1)%Z -> (emax2 >= emax1)%Z ->
+ forall conv_nan m f,
+ is_finite _ _ f = true ->
+ B2R _ _ (Bconv conv_nan m f) = B2R _ _ f
+ /\ is_finite _ _ (Bconv conv_nan m f) = true
+ /\ Bsign _ _ (Bconv conv_nan m f) = Bsign _ _ f.
+Proof.
+ intros PREC EMAX; intros. generalize (Bconv_correct conv_nan m f H).
+ assert (LT: (Rabs (B2R _ _ f) < bpow radix2 emax2)%R).
+ {
+ destruct f; try discriminate; simpl.
+ rewrite Rabs_R0. apply bpow_gt_0.
+ apply Rlt_le_trans with (bpow radix2 emax1).
+ rewrite F2R_cond_Zopp. rewrite abs_cond_Ropp. rewrite <- F2R_Zabs. simpl Z.abs.
+ eapply bounded_lt_emax; eauto.
+ apply bpow_le. omega.
+ }
+ assert (EQ: round radix2 fexp2 (round_mode m) (B2R prec1 emax1 f) = B2R prec1 emax1 f).
+ {
+ apply round_generic. apply valid_rnd_round_mode. eapply generic_inclusion_le.
+ 5: apply generic_format_B2R. apply fexp_correct; auto. apply fexp_correct; auto.
+ instantiate (1 := emax2). intros. unfold fexp2, FLT_exp. unfold emin2. zify; omega.
+ apply Rlt_le; auto.
+ }
+ rewrite EQ. rewrite Rlt_bool_true by auto. auto.
+Qed.
+
+(** Conversion from integers and change of format *)
+
+Theorem Bconv_BofZ:
+ forall conv_nan n,
+ integer_representable prec1 emax1 n ->
+ Bconv conv_nan mode_NE (BofZ prec1 emax1 _ Hmax1 n) = BofZ prec2 emax2 _ Hmax2 n.
+Proof.
+ intros.
+ destruct (BofZ_representable _ _ _ Hmax1 n H) as (A & B & C).
+ set (f := BofZ prec1 emax1 prec1_gt_0_ Hmax1 n) in *.
+ generalize (Bconv_correct conv_nan mode_NE f B).
+ unfold BofZ.
+ generalize (binary_normalize_correct _ _ _ Hmax2 mode_NE n 0 false).
+ fold emin2; fold fexp2. rewrite A.
+ replace (F2R {| Fnum := n; Fexp := 0 |}) with (Z2R n).
+ destruct Rlt_bool.
+- intros (P & Q & R) (D & E & F). apply B2R_Bsign_inj; auto.
+ congruence. rewrite F, C, R. change 0%R with (Z2R 0). rewrite Rcompare_Z2R.
+ unfold Zlt_bool. auto.
+- intros P Q. apply B2FF_inj. rewrite P, Q. rewrite C. f_equal. change 0%R with (Z2R 0).
+ generalize (Zlt_bool_spec n 0); intros LT; inversion LT.
+ rewrite Rlt_bool_true; auto. apply Z2R_lt; auto.
+ rewrite Rlt_bool_false; auto. apply Z2R_le; auto.
+- unfold F2R; simpl. rewrite Rmult_1_r. auto.
+Qed.
+
+(** Change of format (to higher precision) and conversion to integer. *)
+
+Theorem ZofB_Bconv:
+ prec2 >= prec1 -> emax2 >= emax1 ->
+ forall conv_nan m f n,
+ ZofB _ _ f = Some n -> ZofB _ _ (Bconv conv_nan m f) = Some n.
+Proof.
+ intros. rewrite ZofB_correct in H1. destruct (is_finite _ _ f) eqn:FIN; inversion H1.
+ destruct (Bconv_widen_exact H H0 conv_nan m f) as (A & B & C). auto.
+ rewrite ZofB_correct. rewrite B. rewrite A. auto.
+Qed.
+
+Theorem ZofB_range_Bconv:
+ forall min1 max1 min2 max2,
+ prec2 >= prec1 -> emax2 >= emax1 -> min2 <= min1 -> max1 <= max2 ->
+ forall conv_nan m f n,
+ ZofB_range _ _ f min1 max1 = Some n ->
+ ZofB_range _ _ (Bconv conv_nan m f) min2 max2 = Some n.
+Proof.
+ intros.
+ destruct (ZofB_range_inversion _ _ _ _ _ _ H3) as (A & B & C).
+ unfold ZofB_range. erewrite ZofB_Bconv by eauto.
+ rewrite ! Zle_bool_true by omega. auto.
+Qed.
+
+(** Change of format (to higher precision) and comparison. *)
+
+Theorem Bcompare_Bconv_widen:
+ prec2 >= prec1 -> emax2 >= emax1 ->
+ forall conv_nan m x y,
+ Bcompare _ _ (Bconv conv_nan m x) (Bconv conv_nan m y) = Bcompare _ _ x y.
+Proof.
+ intros. destruct (is_finite _ _ x && is_finite _ _ y) eqn:FIN.
+- apply andb_true_iff in FIN. destruct FIN.
+ destruct (Bconv_widen_exact H H0 conv_nan m x H1) as (A & B & C).
+ destruct (Bconv_widen_exact H H0 conv_nan m y H2) as (D & E & F).
+ rewrite ! Bcompare_finite_correct by auto. rewrite A, D. auto.
+- generalize (Bconv_widen_exact H H0 conv_nan m x)
+ (Bconv_widen_exact H H0 conv_nan m y); intros P Q.
+ destruct x, y; try discriminate; simpl in P, Q; simpl;
+ repeat (match goal with |- context [conv_nan ?b ?pl] => destruct (conv_nan b pl) end);
+ auto.
+ destruct Q as (D & E & F); auto.
+ destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b0 (Z.pos m0)) e b0);
+ discriminate || reflexivity.
+ destruct P as (A & B & C); auto.
+ destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b (Z.pos m0)) e b);
+ try discriminate; simpl. destruct b; auto. destruct b, b1; auto.
+ destruct P as (A & B & C); auto.
+ destruct (binary_normalize prec2 emax2 prec2_gt_0_ Hmax2 m (cond_Zopp b (Z.pos m0)) e b);
+ try discriminate; simpl. destruct b; auto.
+ destruct b, b2; auto.
+Qed.
+
+End Conversions.
+
+Section Compose_Conversions.
+
+Variable prec1 emax1 prec2 emax2 : Z.
+Context (prec1_gt_0_ : Prec_gt_0 prec1) (prec2_gt_0_ : Prec_gt_0 prec2).
+Let emin1 := (3 - emax1 - prec1)%Z.
+Let fexp1 := FLT_exp emin1 prec1.
+Let emin2 := (3 - emax2 - prec2)%Z.
+Let fexp2 := FLT_exp emin2 prec2.
+Hypothesis Hmax1 : (prec1 < emax1)%Z.
+Hypothesis Hmax2 : (prec2 < emax2)%Z.
+Let binary_float1 := binary_float prec1 emax1.
+Let binary_float2 := binary_float prec2 emax2.
+
+(** Converting to a higher precision then down to the original format
+ is the identity. *)
+Theorem Bconv_narrow_widen:
+ prec2 >= prec1 -> emax2 >= emax1 ->
+ forall narrow_nan widen_nan m f,
+ is_nan _ _ f = false ->
+ Bconv prec2 emax2 prec1 emax1 _ Hmax1 narrow_nan m (Bconv prec1 emax1 prec2 emax2 _ Hmax2 widen_nan m f) = f.
+Proof.
+ intros. destruct (is_finite _ _ f) eqn:FIN.
+- assert (EQ: round radix2 fexp1 (round_mode m) (B2R prec1 emax1 f) = B2R prec1 emax1 f).
+ { apply round_generic. apply valid_rnd_round_mode. apply generic_format_B2R. }
+ generalize (Bconv_widen_exact _ _ _ _ _ _ Hmax2 H H0 widen_nan m f FIN).
+ set (f' := Bconv prec1 emax1 prec2 emax2 _ Hmax2 widen_nan m f).
+ intros (A & B & C).
+ generalize (Bconv_correct _ _ _ _ _ Hmax1 narrow_nan m f' B).
+ fold emin1. fold fexp1. rewrite A, C, EQ. rewrite Rlt_bool_true.
+ intros (D & E & F).
+ apply B2R_Bsign_inj; auto.
+ destruct f; try discriminate; simpl.
+ rewrite Rabs_R0. apply bpow_gt_0.
+ rewrite F2R_cond_Zopp. rewrite abs_cond_Ropp. rewrite <- F2R_Zabs. simpl Z.abs.
+ eapply bounded_lt_emax; eauto.
+- destruct f; try discriminate. simpl. auto.
+Qed.
+
+End Compose_Conversions.
+
+(** Specialization to binary32 and binary64 formats. *)
+
+Require Import Fappli_IEEE_bits.
+
+Section B3264.
+
+Let prec32 : (0 < 24)%Z.
+apply refl_equal.
+Qed.
+
+Let emax32 : (24 < 128)%Z.
+apply refl_equal.
+Qed.
+
+Let prec64 : (0 < 53)%Z.
+apply refl_equal.
+Qed.
+
+Let emax64 : (53 < 1024)%Z.
+apply refl_equal.
+Qed.
+
+Definition b32_abs : (bool -> nan_pl 24 -> bool * nan_pl 24) -> binary32 -> binary32 := Babs 24 128.
+Definition b32_eq_dec : forall (f1 f2: binary32), {f1=f2} + {f1<>f2} := Beq_dec 24 128.
+Definition b32_compare : binary32 -> binary32 -> option comparison := Bcompare 24 128.
+Definition b32_of_Z : Z -> binary32 := BofZ 24 128 prec32 emax32.
+Definition b32_to_Z : binary32 -> option Z := ZofB 24 128.
+Definition b32_to_Z_range : binary32 -> Z -> Z -> option Z := ZofB_range 24 128.
+Definition b32_exact_inverse : binary32 -> option binary32 := Bexact_inverse 24 128 prec32.
+
+Definition b64_abs : (bool -> nan_pl 53 -> bool * nan_pl 53) -> binary64 -> binary64 := Babs 53 1024.
+Definition b64_eq_dec : forall (f1 f2: binary64), {f1=f2} + {f1<>f2} := Beq_dec 53 1024.
+Definition b64_compare : binary64 -> binary64 -> option comparison := Bcompare 53 1024.
+Definition b64_of_Z : Z -> binary64 := BofZ 53 1024 prec64 emax64.
+Definition b64_to_Z : binary64 -> option Z := ZofB 53 1024.
+Definition b64_to_Z_range : binary64 -> Z -> Z -> option Z := ZofB_range 53 1024.
+Definition b64_exact_inverse : binary64 -> option binary64 := Bexact_inverse 53 1024 prec64.
+
+Definition b64_of_b32 : (bool -> nan_pl 24 -> bool * nan_pl 53) -> mode -> binary32 -> binary64 :=
+ Bconv 24 128 53 1024 prec32 prec64.
+Definition b32_of_b64 : (bool -> nan_pl 53 -> bool * nan_pl 24) -> mode -> binary64 -> binary32 :=
+ Bconv 53 1024 24 128 prec64 prec32.
+
+End B3264.
diff --git a/lib/Floats.v b/lib/Floats.v
index 35009d8..bbc2a92 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -16,47 +16,121 @@
(** Formalization of floating-point numbers, using the Flocq library. *)
-Require Import Axioms.
Require Import Coqlib.
Require Import Integers.
Require Import Fappli_IEEE.
Require Import Fappli_IEEE_bits.
+Require Import Fappli_IEEE_extra.
Require Import Fcore.
-Require Import Fcalc_round.
-Require Import Fcalc_bracket.
-Require Import Fprop_Sterbenz.
Require Import Program.
Require Archi.
Close Scope R_scope.
-Definition float := binary64. (**r the type of IEE754 doubles *)
+Definition float := binary64. (**r the type of IEE754 double-precision FP numbers *)
+Definition float32 := binary32. (**r the type of IEE754 single-precision FP numbers *)
+
+(** Boolean-valued comparisons *)
+
+Definition cmp_of_comparison (c: comparison) (x: option Datatypes.comparison) : bool :=
+ match c with
+ | Ceq =>
+ match x with Some Eq => true | _ => false end
+ | Cne =>
+ match x with Some Eq => false | _ => true end
+ | Clt =>
+ match x with Some Lt => true | _ => false end
+ | Cle =>
+ match x with Some(Lt|Eq) => true | _ => false end
+ | Cgt =>
+ match x with Some Gt => true | _ => false end
+ | Cge =>
+ match x with Some(Gt|Eq) => true | _ => false end
+ end.
+
+Lemma cmp_of_comparison_swap:
+ forall c x,
+ cmp_of_comparison (swap_comparison c) x =
+ cmp_of_comparison c (match x with None => None | Some x => Some (CompOpp x) end).
+Proof.
+ intros. destruct c; destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_ne_eq:
+ forall x, cmp_of_comparison Cne x = negb (cmp_of_comparison Ceq x).
+Proof.
+ intros. destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_lt_eq_false:
+ forall x, cmp_of_comparison Clt x = true -> cmp_of_comparison Ceq x = true -> False.
+Proof.
+ destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+Lemma cmp_of_comparison_le_lt_eq:
+ forall x, cmp_of_comparison Cle x = cmp_of_comparison Clt x || cmp_of_comparison Ceq x.
+Proof.
+ destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_gt_eq_false:
+ forall x, cmp_of_comparison Cgt x = true -> cmp_of_comparison Ceq x = true -> False.
+Proof.
+ destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+Lemma cmp_of_comparison_ge_gt_eq:
+ forall x, cmp_of_comparison Cge x = cmp_of_comparison Cgt x || cmp_of_comparison Ceq x.
+Proof.
+ destruct x as [[]|]; reflexivity.
+Qed.
+
+Lemma cmp_of_comparison_lt_gt_false:
+ forall x, cmp_of_comparison Clt x = true -> cmp_of_comparison Cgt x = true -> False.
+Proof.
+ destruct x as [[]|]; simpl; intros; discriminate.
+Qed.
+
+(** Function used to parse floats *)
+
+Program Definition build_from_parsed
+ (prec:Z) (emax:Z) (prec_gt_0 : Prec_gt_0 prec) (Hmax:prec < emax)
+ (base:positive) (intPart:positive) (expPart:Z) :=
+ match expPart return _ with
+ | Z0 =>
+ binary_normalize prec emax prec_gt_0 Hmax mode_NE (Zpos intPart) Z0 false
+ | Zpos p =>
+ binary_normalize prec emax prec_gt_0 Hmax mode_NE ((Zpos intPart) * Zpower_pos (Zpos base) p) Z0 false
+ | Zneg p =>
+ let exp := Zpower_pos (Zpos base) p in
+ match exp return 0 < exp -> _ with
+ | Zneg _ | Z0 => _
+ | Zpos p =>
+ fun _ =>
+ FF2B prec emax _ (proj1 (Bdiv_correct_aux prec emax prec_gt_0 Hmax mode_NE false intPart Z0 false p Z0))
+ end _
+ end.
+Next Obligation.
+ apply Zpower_pos_gt_0. reflexivity.
+Qed.
+
+Local Notation __ := (refl_equal Datatypes.Lt).
+
+Local Hint Extern 1 (Prec_gt_0 _) => exact (refl_equal Datatypes.Lt).
+Local Hint Extern 1 (_ < _) => exact (refl_equal Datatypes.Lt).
+
+(** * Double-precision FP numbers *)
Module Float.
-Definition zero: float := B754_zero _ _ false. (**r the float [+0.0] *)
+(** ** NaN payload manipulations *)
-Definition eq_dec: forall (f1 f2: float), {f1 = f2} + {f1 <> f2}.
-Proof.
- Ltac try_not_eq := try solve [right; congruence].
- destruct f1 as [| |? []|], f2 as [| |? []|];
- try destruct b; try destruct b0;
- try solve [left; auto]; try_not_eq.
- destruct (positive_eq_dec x x0); try_not_eq;
- subst; left; f_equal; f_equal; apply proof_irr.
- destruct (positive_eq_dec x x0); try_not_eq;
- subst; left; f_equal; f_equal; apply proof_irr.
- destruct (positive_eq_dec m m0); try_not_eq;
- destruct (Z_eq_dec e e1); try solve [right; intro H; inv H; congruence];
- subst; left; rewrite (proof_irr e0 e2); auto.
- destruct (positive_eq_dec m m0); try_not_eq;
- destruct (Z_eq_dec e e1); try solve [right; intro H; inv H; congruence];
- subst; left; rewrite (proof_irr e0 e2); auto.
-Defined.
+(** The following definitions are not part of the IEEE754 standard but
+ apply to all architectures supported by CompCert. *)
+
+(** Transform a Nan payload to a quiet Nan payload. *)
-(* Transform a Nan payload to a quiet Nan payload.
- This is not part of the IEEE754 standard, but shared between all
- architectures of Compcert. *)
Program Definition transform_quiet_pl (pl:nan_pl 53) : nan_pl 53 :=
Pos.lor pl (nat_iter 51 xO xH).
Next Obligation.
@@ -91,39 +165,10 @@ Proof.
simpl. apply lor_idempotent.
Qed.
-(** Arithmetic operations *)
-
-(* The Nan payload operations for neg and abs is not part of the IEEE754
- standard, but shared between all architectures of Compcert. *)
-Definition neg_pl (s:bool) (pl:nan_pl 53) := (negb s, pl).
-Definition abs_pl (s:bool) (pl:nan_pl 53) := (false, pl).
-
-Definition neg: float -> float := b64_opp neg_pl. (**r opposite (change sign) *)
-Definition abs (x: float): float := (**r absolute value (set sign to [+]) *)
- match x with
- | B754_nan s pl => let '(s, pl) := abs_pl s pl in B754_nan _ _ s pl
- | B754_infinity _ => B754_infinity _ _ false
- | B754_finite _ m e H => B754_finite _ _ false m e H
- | B754_zero _ => B754_zero _ _ false
- end.
-
-Definition binary_normalize64 (m e:Z) (s:bool): float :=
- binary_normalize 53 1024 eq_refl eq_refl mode_NE m e s.
+(** Nan payload operations for single <-> double conversions. *)
-Definition binary_normalize64_correct (m e:Z) (s:bool) :=
- binary_normalize_correct 53 1024 eq_refl eq_refl mode_NE m e s.
-Global Opaque binary_normalize64_correct.
-
-Definition binary_normalize32 (m e:Z) (s:bool) : binary32 :=
- binary_normalize 24 128 eq_refl eq_refl mode_NE m e s.
-
-Definition binary_normalize32_correct (m e:Z) (s:bool) :=
- binary_normalize_correct 24 128 eq_refl eq_refl mode_NE m e s.
-Global Opaque binary_normalize32_correct.
-
-(* The Nan payload operations for single <-> double conversions are not part of
- the IEEE754 standard, but shared between all architectures of Compcert. *)
-Definition floatofbinary32_pl (s:bool) (pl:nan_pl 24) : (bool * nan_pl 53).
+Definition of_single_pl (s:bool) (pl:nan_pl 24) : (bool * nan_pl 53).
+Proof.
Proof.
refine (s, transform_quiet_pl (exist _ (Pos.shiftl_nat (proj1_sig pl) 29) _)).
abstract (
@@ -133,7 +178,8 @@ Proof.
zify; omega).
Defined.
-Definition binary32offloat_pl (s:bool) (pl:nan_pl 53) : (bool * nan_pl 24).
+Definition to_single_pl (s:bool) (pl:nan_pl 53) : (bool * nan_pl 24).
+Proof.
Proof.
refine (s, exist _ (Pos.shiftr_nat (proj1_sig (transform_quiet_pl pl)) 29) _).
abstract (
@@ -144,128 +190,14 @@ Proof.
rewrite !H, <- !NPeano.Nat.sub_add_distr; zify; omega).
Defined.
-Definition floatofbinary32 (f: binary32) : float := (**r single precision embedding in double precision *)
- match f with
- | B754_nan s pl => let '(s, pl) := floatofbinary32_pl s pl in B754_nan _ _ s pl
- | B754_infinity s => B754_infinity _ _ s
- | B754_zero s => B754_zero _ _ s
- | B754_finite s m e _ =>
- binary_normalize64 (cond_Zopp s (Zpos m)) e s
- end.
-
-Definition binary32offloat (f: float) : binary32 := (**r conversion to single precision *)
- match f with
- | B754_nan s pl => let '(s, pl) := binary32offloat_pl s pl in B754_nan _ _ s pl
- | B754_infinity s => B754_infinity _ _ s
- | B754_zero s => B754_zero _ _ s
- | B754_finite s m e _ =>
- binary_normalize32 (cond_Zopp s (Zpos m)) e s
- end.
-
-Definition singleoffloat (f: float): float := (**r conversion to single precision, embedded in double *)
- floatofbinary32 (binary32offloat f).
-
-Definition Zoffloat (f:float): option Z := (**r conversion to Z *)
- match f with
- | B754_finite s m (Zpos e) _ => Some (cond_Zopp s (Zpos m) * Zpower_pos radix2 e)
- | B754_finite s m 0 _ => Some (cond_Zopp s (Zpos m))
- | B754_finite s m (Zneg e) _ => Some (cond_Zopp s (Zpos m / Zpower_pos radix2 e))
- | B754_zero _ => Some 0
- | _ => None
- end.
-
-Definition intoffloat (f:float): option int := (**r conversion to signed 32-bit int *)
- match Zoffloat f with
- | Some n =>
- if Zle_bool Int.min_signed n && Zle_bool n Int.max_signed then
- Some (Int.repr n)
- else
- None
- | None => None
- end.
-
-Definition intuoffloat (f:float): option int := (**r conversion to unsigned 32-bit int *)
- match Zoffloat f with
- | Some n =>
- if Zle_bool 0 n && Zle_bool n Int.max_unsigned then
- Some (Int.repr n)
- else
- None
- | None => None
- end.
-
-Definition longoffloat (f:float): option int64 := (**r conversion to signed 64-bit int *)
- match Zoffloat f with
- | Some n =>
- if Zle_bool Int64.min_signed n && Zle_bool n Int64.max_signed then
- Some (Int64.repr n)
- else
- None
- | None => None
- end.
-
-Definition longuoffloat (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
- match Zoffloat f with
- | Some n =>
- if Zle_bool 0 n && Zle_bool n Int64.max_unsigned then
- Some (Int64.repr n)
- else
- None
- | None => None
- end.
-
-(* Functions used to parse floats *)
-Program Definition build_from_parsed
- (prec:Z) (emax:Z) (prec_gt_0 :Prec_gt_0 prec) (Hmax:prec < emax)
- (base:positive) (intPart:positive) (expPart:Z) :=
- match expPart return _ with
- | Z0 =>
- binary_normalize prec emax prec_gt_0 Hmax mode_NE (Zpos intPart) Z0 false
- | Zpos p =>
- binary_normalize prec emax prec_gt_0 Hmax mode_NE ((Zpos intPart) * Zpower_pos (Zpos base) p) Z0 false
- | Zneg p =>
- let exp := Zpower_pos (Zpos base) p in
- match exp return 0 < exp -> _ with
- | Zneg _ | Z0 => _
- | Zpos p =>
- fun _ =>
- FF2B prec emax _ (proj1 (Bdiv_correct_aux prec emax prec_gt_0 Hmax mode_NE false intPart Z0 false p Z0))
- end _
- end.
-Next Obligation.
-apply Zpower_pos_gt_0.
-reflexivity.
-Qed.
-
-Definition build_from_parsed64 (base:positive) (intPart:positive) (expPart:Z) : float :=
- build_from_parsed 53 1024 eq_refl eq_refl base intPart expPart.
+(** NaN payload operations for opposite and absolute value. *)
-Definition build_from_parsed32 (base:positive) (intPart:positive) (expPart:Z) : float :=
- floatofbinary32 (build_from_parsed 24 128 eq_refl eq_refl base intPart expPart).
-
-Definition floatofint (n:int): float := (**r conversion from signed 32-bit int *)
- binary_normalize64 (Int.signed n) 0 false.
-Definition floatofintu (n:int): float:= (**r conversion from unsigned 32-bit int *)
- binary_normalize64 (Int.unsigned n) 0 false.
-
-Definition floatoflong (n:int64): float := (**r conversion from signed 64-bit int *)
- binary_normalize64 (Int64.signed n) 0 false.
-Definition floatoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int *)
- binary_normalize64 (Int64.unsigned n) 0 false.
-
-Definition singleofint (n:int): float := (**r conversion from signed 32-bit int to single-precision float *)
- floatofbinary32 (binary_normalize32 (Int.signed n) 0 false).
-Definition singleofintu (n:int): float:= (**r conversion from unsigned 32-bit int to single-precision float *)
- floatofbinary32 (binary_normalize32 (Int.unsigned n) 0 false).
-
-Definition singleoflong (n:int64): float := (**r conversion from signed 64-bit int to single-precision float *)
- floatofbinary32 (binary_normalize32 (Int64.signed n) 0 false).
-Definition singleoflongu (n:int64): float:= (**r conversion from unsigned 64-bit int to single-precision float *)
- floatofbinary32 (binary_normalize32 (Int64.unsigned n) 0 false).
+Definition neg_pl (s:bool) (pl:nan_pl 53) := (negb s, pl).
+Definition abs_pl (s:bool) (pl:nan_pl 53) := (false, pl).
-(* The Nan payload operations for two-argument arithmetic operations are not part of
- the IEEE754 standard, but all architectures of Compcert share a similar
- NaN behavior, parameterized by:
+(** The NaN payload operations for two-argument arithmetic operations
+ are not part of the IEEE754 standard, but all architectures of
+ Compcert share a similar NaN behavior, parameterized by:
- a "default" payload which occurs when an operation generates a NaN from
non-NaN arguments;
- a choice function determining which of the payload arguments to choose,
@@ -274,78 +206,67 @@ Definition singleoflongu (n:int64): float:= (**r conversion from unsigned 64-bit
Definition binop_pl (x y: binary64) : bool*nan_pl 53 :=
match x, y with
| B754_nan s1 pl1, B754_nan s2 pl2 =>
- if Archi.choose_binop_pl s1 pl1 s2 pl2
+ if Archi.choose_binop_pl_64 s1 pl1 s2 pl2
then (s2, transform_quiet_pl pl2)
else (s1, transform_quiet_pl pl1)
| B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
| _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
- | _, _ => Archi.default_pl
+ | _, _ => Archi.default_pl_64
end.
+(** ** Operations over double-precision floats *)
+
+Definition zero: float := B754_zero _ _ false. (**r the float [+0.0] *)
+
+Definition eq_dec: forall (f1 f2: float), {f1 = f2} + {f1 <> f2} := b64_eq_dec.
+
+(** Arithmetic operations *)
+
+Definition neg: float -> float := b64_opp neg_pl. (**r opposite (change sign) *)
+Definition abs: float -> float := b64_abs abs_pl. (**r absolute value (set sign to [+]) *)
Definition add: float -> float -> float := b64_plus binop_pl mode_NE. (**r addition *)
Definition sub: float -> float -> float := b64_minus binop_pl mode_NE. (**r subtraction *)
Definition mul: float -> float -> float := b64_mult binop_pl mode_NE. (**r multiplication *)
Definition div: float -> float -> float := b64_div binop_pl mode_NE. (**r division *)
+Definition cmp (c:comparison) (f1 f2: float) : bool := (**r comparison *)
+ cmp_of_comparison c (b64_compare f1 f2).
-Definition order_float (f1 f2:float): option Datatypes.comparison :=
- match f1, f2 with
- | B754_nan _ _,_ | _,B754_nan _ _ => None
- | B754_infinity true, B754_infinity true
- | B754_infinity false, B754_infinity false => Some Eq
- | B754_infinity true, _ => Some Lt
- | B754_infinity false, _ => Some Gt
- | _, B754_infinity true => Some Gt
- | _, B754_infinity false => Some Lt
- | B754_finite true _ _ _, B754_zero _ => Some Lt
- | B754_finite false _ _ _, B754_zero _ => Some Gt
- | B754_zero _, B754_finite true _ _ _ => Some Gt
- | B754_zero _, B754_finite false _ _ _ => Some Lt
- | B754_zero _, B754_zero _ => Some Eq
- | B754_finite s1 m1 e1 _, B754_finite s2 m2 e2 _ =>
- match s1, s2 with
- | true, false => Some Lt
- | false, true => Some Gt
- | false, false =>
- match Zcompare e1 e2 with
- | Lt => Some Lt
- | Gt => Some Gt
- | Eq => Some (Pcompare m1 m2 Eq)
- end
- | true, true =>
- match Zcompare e1 e2 with
- | Lt => Some Gt
- | Gt => Some Lt
- | Eq => Some (CompOpp (Pcompare m1 m2 Eq))
- end
- end
- end.
+(** Conversions *)
-Definition cmp (c:comparison) (f1 f2:float) : bool := (**r comparison *)
- match c with
- | Ceq =>
- match order_float f1 f2 with Some Eq => true | _ => false end
- | Cne =>
- match order_float f1 f2 with Some Eq => false | _ => true end
- | Clt =>
- match order_float f1 f2 with Some Lt => true | _ => false end
- | Cle =>
- match order_float f1 f2 with Some(Lt|Eq) => true | _ => false end
- | Cgt =>
- match order_float f1 f2 with Some Gt => true | _ => false end
- | Cge =>
- match order_float f1 f2 with Some(Gt|Eq) => true | _ => false end
- end.
+Definition of_single: float32 -> float := b64_of_b32 of_single_pl mode_NE.
+Definition to_single: float -> float32 := b32_of_b64 to_single_pl mode_NE.
+
+Definition to_int (f:float): option int := (**r conversion to signed 32-bit int *)
+ option_map Int.repr (b64_to_Z_range f Int.min_signed Int.max_signed).
+Definition to_intu (f:float): option int := (**r conversion to unsigned 32-bit int *)
+ option_map Int.repr (b64_to_Z_range f 0 Int.max_unsigned).
+Definition to_long (f:float): option int64 := (**r conversion to signed 64-bit int *)
+ option_map Int64.repr (b64_to_Z_range f Int64.min_signed Int64.max_signed).
+Definition to_longu (f:float): option int64 := (**r conversion to unsigned 64-bit int *)
+ option_map Int64.repr (b64_to_Z_range f 0 Int64.max_unsigned).
+
+Definition of_int (n:int): float := (**r conversion from signed 32-bit int *)
+ b64_of_Z (Int.signed n).
+Definition of_intu (n:int): float:= (**r conversion from unsigned 32-bit int *)
+ b64_of_Z (Int.unsigned n).
+
+Definition of_long (n:int64): float := (**r conversion from signed 64-bit int *)
+ b64_of_Z (Int64.signed n).
+Definition of_longu (n:int64): float:= (**r conversion from unsigned 64-bit int *)
+ b64_of_Z (Int64.unsigned n).
+
+Definition from_parsed (base:positive) (intPart:positive) (expPart:Z) : float :=
+ build_from_parsed 53 1024 __ __ base intPart expPart.
(** Conversions between floats and their concrete in-memory representation
- as a sequence of 64 bits (double precision) or 32 bits (single precision). *)
+ as a sequence of 64 bits. *)
-Definition bits_of_double (f: float): int64 := Int64.repr (bits_of_b64 f).
-Definition double_of_bits (b: int64): float := b64_of_bits (Int64.unsigned b).
+Definition to_bits (f: float): int64 := Int64.repr (bits_of_b64 f).
+Definition of_bits (b: int64): float := b64_of_bits (Int64.unsigned b).
-Definition bits_of_single (f: float) : int := Int.repr (bits_of_b32 (binary32offloat f)).
-Definition single_of_bits (b: int): float := floatofbinary32 (b32_of_bits (Int.unsigned b)).
+Definition from_words (hi lo: int) : float := of_bits (Int64.ofwords hi lo).
-Definition from_words (hi lo: int) : float := double_of_bits (Int64.ofwords hi lo).
+(** ** Properties *)
(** Below are the only properties of floating-point arithmetic that we
rely on in the compiler proof. *)
@@ -362,75 +283,37 @@ Ltac smart_omega :=
compute_this Int.min_signed; compute_this Int.max_signed;
compute_this Int64.modulus; compute_this Int64.half_modulus;
compute_this Int64.max_unsigned;
- compute_this (Zpower_pos 2 1024); compute_this (Zpower_pos 2 53); compute_this (Zpower_pos 2 52);
+ compute_this (Zpower_pos 2 1024); compute_this (Zpower_pos 2 53); compute_this (Zpower_pos 2 52); compute_this (Zpower_pos 2 32);
zify; omega.
-Lemma floatofbinary32_exact :
- forall f, is_finite_strict _ _ f = true ->
- is_finite_strict _ _ (floatofbinary32 f) = true /\ B2R _ _ f = B2R _ _ (floatofbinary32 f).
-Proof.
- destruct f as [ | | |s m e]; try discriminate; intro.
- pose proof (binary_normalize64_correct (cond_Zopp s (Zpos m)) e s).
- match goal with [H0:if Rlt_bool (Rabs ?x) _ then _ else _ |- _ /\ ?y = _] => assert (x=y)%R end.
- apply round_generic; [now apply valid_rnd_round_mode|].
- apply (generic_inclusion_ln_beta _ (FLT_exp (3 - 128 - 24) 24)).
- intro; eapply Zle_trans; [apply Zle_max_compat_l | apply Zle_max_compat_r]; omega.
- apply generic_format_canonic; apply canonic_canonic_mantissa; apply (proj1 (andb_prop _ _ e0)).
- rewrite H1, Rlt_bool_true in H0; intuition; unfold floatofbinary32, binary_normalize64.
- match goal with [ |- _ _ _ ?x = true ] => destruct x end; try discriminate.
- symmetry in H2; apply F2R_eq_0_reg in H2; destruct s; discriminate.
- reflexivity.
- eapply Rlt_trans.
- unfold B2R; rewrite <- F2R_Zabs, abs_cond_Zopp; eapply bounded_lt_emax; now apply e0.
- now apply bpow_lt.
-Qed.
-
-Lemma binary32offloatofbinary32_num :
- forall f, is_nan _ _ f = false ->
- binary32offloat (floatofbinary32 f) = f.
-Proof.
- intros f Hnan; pose proof (floatofbinary32_exact f); destruct f as [ | | |s m e]; try reflexivity.
- discriminate.
- specialize (H eq_refl); destruct H.
- destruct (floatofbinary32 (B754_finite 24 128 s m e e0)) as [ | | |s1 m1 e1]; try discriminate.
- unfold binary32offloat.
- pose proof (binary_normalize32_correct (cond_Zopp s1 (Zpos m1)) e1 s1).
- unfold B2R at 2 in H0; cbv iota zeta beta in H0; rewrite <- H0, round_generic in H1.
- rewrite Rlt_bool_true in H1.
- unfold binary_normalize32.
- apply B2R_inj; intuition; match goal with [|- _ _ _ ?f = true] => destruct f end; try discriminate.
- symmetry in H2; apply F2R_eq_0_reg in H2; destruct s; discriminate.
- reflexivity.
- unfold B2R; rewrite <- F2R_Zabs, abs_cond_Zopp; eapply bounded_lt_emax; apply e0.
- now apply valid_rnd_round_mode.
- now apply generic_format_B2R.
-Qed.
-
-Lemma floatofbinary32offloatofbinary32_pl:
+Lemma of_single_to_single_pl:
forall s pl,
- prod_rect (fun _ => _) floatofbinary32_pl (prod_rect (fun _ => _) binary32offloat_pl (floatofbinary32_pl s pl)) = floatofbinary32_pl s pl.
+ prod_rect (fun _ => _) of_single_pl (prod_rect (fun _ => _) to_single_pl (of_single_pl s pl)) = of_single_pl s pl.
Proof.
- destruct pl. unfold binary32offloat_pl, floatofbinary32_pl.
+ destruct pl. unfold of_single_pl, to_single_pl.
unfold transform_quiet_pl, proj1_sig. simpl.
f_equal. apply nan_payload_fequal.
unfold Pos.shiftr_nat. simpl.
rewrite !lor_idempotent. reflexivity.
Qed.
-Lemma floatofbinary32offloatofbinary32 :
- forall f, floatofbinary32 (binary32offloat (floatofbinary32 f)) = floatofbinary32 f.
+Lemma of_single_to_single_of_single:
+ forall f, of_single (to_single (of_single f)) = of_single f.
Proof.
- destruct f; try (rewrite binary32offloatofbinary32_num; tauto).
- unfold floatofbinary32, binary32offloat.
- rewrite <- floatofbinary32offloatofbinary32_pl at 2.
+ intros. unfold of_single, to_single, b64_of_b32, b32_of_b64.
+ destruct (is_nan _ _ f) eqn:ISNAN.
+- destruct f; try discriminate.
+ unfold Bconv.
+ rewrite <- of_single_to_single_pl at 2.
reflexivity.
+- rewrite (Bconv_narrow_widen 24); auto; omega.
Qed.
-Lemma binary32offloatofbinary32offloat_pl:
+Lemma to_single_of_single_pl:
forall s pl,
- prod_rect (fun _ => _) binary32offloat_pl (prod_rect (fun _ => _) floatofbinary32_pl (binary32offloat_pl s pl)) = binary32offloat_pl s pl.
+ prod_rect (fun _ => _) to_single_pl (prod_rect (fun _ => _) of_single_pl (to_single_pl s pl)) = to_single_pl s pl.
Proof.
- destruct pl. unfold binary32offloat_pl, floatofbinary32_pl. unfold prod_rect.
+ destruct pl. unfold of_single_pl, to_single_pl. unfold prod_rect.
f_equal. apply nan_payload_fequal.
rewrite transform_quiet_pl_idempotent.
unfold transform_quiet_pl, proj1_sig.
@@ -444,293 +327,132 @@ Proof.
rewrite !nat_iter_succ_r with (f:=Pos.div2). auto.
Qed.
-Lemma binary32offloatofbinary32offloat :
- forall f, binary32offloat (floatofbinary32 (binary32offloat f)) = binary32offloat f.
+Lemma to_single_of_single_to_single:
+ forall f, to_single (of_single (to_single f)) = to_single f.
Proof.
- destruct f; try (rewrite binary32offloatofbinary32_num; simpl; tauto).
- unfold floatofbinary32, binary32offloat.
- rewrite <- binary32offloatofbinary32offloat_pl at 2.
+ intros. unfold to_single, of_single, b32_of_b64, b64_of_b32.
+ destruct (is_nan _ _ f) eqn:ISNAN.
+- destruct f; try discriminate.
+ unfold Bconv.
+ rewrite <- to_single_of_single_pl at 2.
reflexivity.
- rewrite binary32offloatofbinary32_num; simpl. auto.
- unfold binary_normalize32.
- pose proof (binary_normalize32_correct (cond_Zopp b (Z.pos m)) e b).
- destruct binary_normalize; auto. simpl in H.
- destruct Rlt_bool in H. intuition.
- unfold binary_overflow in H. destruct n.
- destruct overflow_to_inf in H; discriminate.
+- rewrite (Bconv_narrow_widen 24); auto. omega. omega.
+ set (f' := Bconv 53 1024 24 128 __ __ to_single_pl mode_NE f).
+ destruct f; try discriminate; try reflexivity.
+ exploit (Bconv_correct 53 1024 24 128 __ __ to_single_pl mode_NE
+ (B754_finite 53 1024 b m e e0)). auto.
+ destruct Rlt_bool.
+ intros (A & B & C). apply is_finite_not_is_nan; auto.
+ fold f'. intros A. destruct f'; auto.
+ simpl in A. unfold binary_overflow in A.
+ destruct overflow_to_inf in A; destruct n; discriminate.
Qed.
-Theorem singleoffloat_idem:
- forall f, singleoffloat (singleoffloat f) = singleoffloat f.
-Proof.
- intros; unfold singleoffloat; rewrite binary32offloatofbinary32offloat; reflexivity.
-Qed.
+(** Commutativity properties of addition and multiplication. *)
-Theorem singleoflong_idem:
- forall n, singleoffloat (singleoflong n) = singleoflong n.
+Theorem add_commut:
+ forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
Proof.
- intros; unfold singleoffloat, singleoflong. rewrite floatofbinary32offloatofbinary32; reflexivity.
+ intros. apply Bplus_commut.
+ destruct x, y; try reflexivity. simpl in H. intuition congruence.
Qed.
-Theorem singleoflongu_idem:
- forall n, singleoffloat (singleoflongu n) = singleoflongu n.
+Theorem mul_commut:
+ forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
Proof.
- intros; unfold singleoffloat, singleoflongu. rewrite floatofbinary32offloatofbinary32; reflexivity.
+ intros. apply Bmult_commut.
+ destruct x, y; try reflexivity. simpl in H. intuition congruence.
Qed.
-Definition is_single (f: float) : Prop := exists s, f = floatofbinary32 s.
-
-Theorem singleoffloat_is_single:
- forall f, is_single (singleoffloat f).
-Proof.
- intros. exists (binary32offloat f); auto.
-Qed.
+(** Multiplication by 2 is diagonal addition. *)
-Theorem singleoffloat_of_single:
- forall f, is_single f -> singleoffloat f = f.
+Theorem mul2_add:
+ forall f, add f f = mul f (of_int (Int.repr 2%Z)).
Proof.
- intros. destruct H as [s EQ]. subst f. unfold singleoffloat.
- apply floatofbinary32offloatofbinary32.
+ intros. apply Bmult2_Bplus.
+ intros. destruct x; try discriminate. simpl.
+ transitivity (b, transform_quiet_pl n).
+ destruct Archi.choose_binop_pl_64; auto.
+ destruct y; auto || discriminate.
Qed.
-Theorem is_single_dec: forall f, {is_single f} + {~is_single f}.
-Proof.
- intros. case (eq_dec (singleoffloat f) f); intros.
- unfold singleoffloat in e. left. exists (binary32offloat f). auto.
- right; red; intros; elim n. apply singleoffloat_of_single; auto.
-Defined.
+(** Divisions that can be turned into multiplication by an inverse. *)
-(** Commutativity properties of addition and multiplication. *)
+Definition exact_inverse : float -> option float := b64_exact_inverse.
-Theorem add_commut:
- forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
-Proof.
- intros x y NAN. unfold add, b64_plus.
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
- unfold Bplus in *; destruct x; destruct y; auto.
-- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB; auto. f_equal; apply eqb_prop; auto.
-- rewrite (eqb_sym b0 b). destruct (eqb b b0) eqn:EQB.
- f_equal; apply eqb_prop; auto.
- auto.
-- simpl in NAN; intuition congruence.
-- exploit H; auto. clear H. exploit H0; auto. clear H0.
- set (x := B754_finite 53 1024 b0 m0 e1 e2).
- set (rx := B2R 53 1024 x).
- set (y := B754_finite 53 1024 b m e e0).
- set (ry := B2R 53 1024 y).
- rewrite (Rplus_comm ry rx). destruct Rlt_bool.
- intros (A1 & A2 & A3) (B1 & B2 & B3).
- apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto.
- rewrite Z.add_comm. rewrite Z.min_comm. auto.
- intros (A1 & A2) (B1 & B2). apply B2FF_inj. rewrite B2 in B1. rewrite <- B1 in A1. auto.
-Qed.
-
-Theorem mul_commut:
- forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
+Theorem div_mul_inverse:
+ forall x y z, exact_inverse y = Some z -> div x y = mul x z.
Proof.
- intros x y NAN. unfold mul, b64_mult.
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y).
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE y x).
- unfold Bmult in *; destruct x; destruct y; auto.
-- f_equal. apply xorb_comm.
-- f_equal. apply xorb_comm.
-- f_equal. apply xorb_comm.
-- f_equal. apply xorb_comm.
-- simpl in NAN. intuition congruence.
-- f_equal. apply xorb_comm.
-- f_equal. apply xorb_comm.
-- set (x := B754_finite 53 1024 b0 m0 e1 e2) in *.
- set (rx := B2R 53 1024 x) in *.
- set (y := B754_finite 53 1024 b m e e0) in *.
- set (ry := B2R 53 1024 y) in *.
- rewrite (Rmult_comm ry rx) in *. destruct Rlt_bool.
- destruct H as (A1 & A2 & A3); destruct H0 as (B1 & B2 & B3).
- apply B2R_Bsign_inj; auto. rewrite <- B1 in A1. auto.
- rewrite ! Bsign_FF2B. f_equal. f_equal. apply xorb_comm. apply Pos.mul_comm. apply Z.add_comm.
- apply B2FF_inj. etransitivity. eapply H. rewrite xorb_comm. auto.
+ intros. apply Bdiv_mult_inverse; auto.
+ intros. destruct x0; try discriminate. simpl.
+ transitivity (b, transform_quiet_pl n).
+ destruct y0; reflexivity || discriminate.
+ destruct z0; reflexivity || discriminate.
Qed.
(** Properties of comparisons. *)
-Theorem order_float_finite_correct:
- forall f1 f2, is_finite _ _ f1 = true -> is_finite _ _ f2 = true ->
- match order_float f1 f2 with
- | Some c => Rcompare (B2R _ _ f1) (B2R _ _ f2) = c
- | None => False
- end.
-Proof.
- Ltac apply_Rcompare :=
- match goal with
- | [ |- Rcompare _ _ = Lt ] => apply Rcompare_Lt
- | [ |- Rcompare _ _ = Eq ] => apply Rcompare_Eq
- | [ |- Rcompare _ _ = Gt ] => apply Rcompare_Gt
- end.
- unfold order_float; intros.
- destruct f1, f2; try discriminate; unfold B2R, F2R, Fnum, Fexp, cond_Zopp;
- try (replace 0%R with (Z2R 0 * bpow radix2 e)%R by (simpl Z2R; ring);
- rewrite Rcompare_mult_r by (apply bpow_gt_0); rewrite Rcompare_Z2R).
- apply_Rcompare; reflexivity.
- destruct b0; reflexivity.
- destruct b; reflexivity.
- clear H H0.
- apply andb_prop in e0; destruct e0; apply (canonic_canonic_mantissa _ _ false) in H.
- apply andb_prop in e2; destruct e2; apply (canonic_canonic_mantissa _ _ false) in H1.
- pose proof (Zcompare_spec e e1); unfold canonic, Fexp in H1, H.
- assert (forall m1 m2 e1 e2,
- let x := (Z2R (Zpos m1) * bpow radix2 e1)%R in
- let y := (Z2R (Zpos m2) * bpow radix2 e2)%R in
- canonic_exp radix2 (FLT_exp (3-1024-53) 53) x < canonic_exp radix2 (FLT_exp (3-1024-53) 53) y -> (x < y)%R).
- intros; apply Rnot_le_lt; intro; apply (ln_beta_le radix2) in H5.
- apply (fexp_monotone 53 1024) in H5; unfold canonic_exp in H4; omega.
- apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
- assert (forall m1 m2 e1 e2, (Z2R (- Zpos m1) * bpow radix2 e1 < Z2R (Zpos m2) * bpow radix2 e2)%R).
- intros; apply (Rlt_trans _ 0%R).
- replace 0%R with (0*bpow radix2 e0)%R by ring; apply Rmult_lt_compat_r;
- [apply bpow_gt_0; reflexivity|now apply (Z2R_lt _ 0)].
- apply Rmult_gt_0_compat; [apply (Z2R_lt 0); reflexivity|now apply bpow_gt_0].
- destruct b, b0; try (now apply_Rcompare; apply H5); inversion H3;
- try (apply_Rcompare; apply H4; rewrite H, H1 in H7; assumption);
- try (apply_Rcompare; do 2 rewrite Z2R_opp, Ropp_mult_distr_l_reverse;
- apply Ropp_lt_contravar; apply H4; rewrite H, H1 in H7; assumption);
- rewrite H7, Rcompare_mult_r, Rcompare_Z2R by (apply bpow_gt_0); reflexivity.
-Qed.
-
Theorem cmp_swap:
- forall c x y, Float.cmp (swap_comparison c) x y = Float.cmp c y x.
+ forall c x y, cmp (swap_comparison c) x y = cmp c y x.
Proof.
- destruct c, x, y; simpl; try destruct b; try destruct b0; try reflexivity;
- rewrite <- (Zcompare_antisym e e1); destruct (e ?= e1); try reflexivity;
- change Eq with (CompOpp Eq); rewrite <- (Pcompare_antisym m m0 Eq);
- simpl; destruct (Pcompare m m0 Eq); reflexivity.
+ unfold cmp, b64_compare; intros. rewrite (Bcompare_swap _ _ x y).
+ apply cmp_of_comparison_swap.
Qed.
Theorem cmp_ne_eq:
forall f1 f2, cmp Cne f1 f2 = negb (cmp Ceq f1 f2).
Proof.
- unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; reflexivity.
+ intros; apply cmp_of_comparison_ne_eq.
Qed.
Theorem cmp_lt_eq_false:
forall f1 f2, cmp Clt f1 f2 = true -> cmp Ceq f1 f2 = true -> False.
Proof.
- unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; discriminate.
+ intros f1 f2; apply cmp_of_comparison_lt_eq_false.
Qed.
Theorem cmp_le_lt_eq:
forall f1 f2, cmp Cle f1 f2 = cmp Clt f1 f2 || cmp Ceq f1 f2.
Proof.
- unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; reflexivity.
+ intros f1 f2; apply cmp_of_comparison_le_lt_eq.
Qed.
-Corollary cmp_gt_eq_false:
+Theorem cmp_gt_eq_false:
forall x y, cmp Cgt x y = true -> cmp Ceq x y = true -> False.
Proof.
- intros; rewrite <- cmp_swap in H; rewrite <- cmp_swap in H0;
- eapply cmp_lt_eq_false; now eauto.
+ intros f1 f2; apply cmp_of_comparison_gt_eq_false.
Qed.
-Corollary cmp_ge_gt_eq:
+Theorem cmp_ge_gt_eq:
forall f1 f2, cmp Cge f1 f2 = cmp Cgt f1 f2 || cmp Ceq f1 f2.
Proof.
- intros.
- change Cge with (swap_comparison Cle); change Cgt with (swap_comparison Clt);
- change Ceq with (swap_comparison Ceq).
- repeat rewrite cmp_swap.
- now apply cmp_le_lt_eq.
+ intros f1 f2; apply cmp_of_comparison_ge_gt_eq.
Qed.
Theorem cmp_lt_gt_false:
forall f1 f2, cmp Clt f1 f2 = true -> cmp Cgt f1 f2 = true -> False.
Proof.
- unfold cmp; intros; destruct (order_float f1 f2) as [ [] | ]; discriminate.
+ intros f1 f2; apply cmp_of_comparison_lt_gt_false.
Qed.
(** Properties of conversions to/from in-memory representation.
- The double-precision conversions are bijective (one-to-one).
- The single-precision conversions lose precision exactly
- as described by [singleoffloat] rounding. *)
+ The conversions are bijective (one-to-one). *)
-Theorem double_of_bits_of_double:
- forall f, double_of_bits (bits_of_double f) = f.
+Theorem of_to_bits:
+ forall f, of_bits (to_bits f) = f.
Proof.
- intros; unfold double_of_bits, bits_of_double, bits_of_b64, b64_of_bits.
+ intros; unfold of_bits, to_bits, bits_of_b64, b64_of_bits.
rewrite Int64.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
- destruct f.
- simpl; try destruct b; vm_compute; split; congruence.
- simpl; try destruct b; vm_compute; split; congruence.
- destruct n as [p Hp].
- simpl. rewrite Z.ltb_lt in Hp.
- apply Zlt_succ_le with (m:=52) in Hp.
- apply Zpower_le with (r:=radix2) in Hp.
- edestruct Fcore_digits.digits2_Pnat_correct.
- rewrite Zpower_nat_Z in H0.
- eapply Z.lt_le_trans in Hp; eauto.
- unfold join_bits; destruct b.
- compute_this ((2 ^ 11 + 2047) * 2 ^ 52). smart_omega.
- compute_this ((0 + 2047) * 2 ^ 52). smart_omega.
- unfold bits_of_binary_float, join_bits.
- destruct (andb_prop _ _ e0); apply Zle_bool_imp_le in H0; apply Zeq_bool_eq in H; unfold FLT_exp in H.
- match goal with [H:Zmax ?x ?y = e|-_] => pose proof (Zle_max_l x y); pose proof (Zle_max_r x y) end.
- rewrite H, Fcalc_digits.Z_of_nat_S_digits2_Pnat in *.
- lapply (Fcalc_digits.Zpower_gt_Zdigits radix2 53 (Zpos m)). intro.
- unfold radix2, radix_val, Zabs in H3.
- pose proof (Zle_bool_spec (2 ^ 52) (Zpos m)).
- assert (Zpos m > 0); [vm_compute; exact eq_refl|].
- compute_this (2^11); compute_this (2^(11-1)).
- inversion H4; fold (2^52) in *; destruct H6; destruct b; now smart_omega.
- change Fcalc_digits.radix2 with radix2 in H1; omega.
-Qed.
-
-Theorem single_of_bits_of_single:
- forall f, single_of_bits (bits_of_single f) = singleoffloat f.
-Proof.
- intros; unfold single_of_bits, bits_of_single, bits_of_b32, b32_of_bits.
- rewrite Int.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
- destruct (binary32offloat f).
- simpl; try destruct b; vm_compute; split; congruence.
- simpl; try destruct b; vm_compute; split; congruence.
- destruct n as [p Hp].
- simpl. rewrite Z.ltb_lt in Hp.
- apply Zlt_succ_le with (m:=23) in Hp.
- apply Zpower_le with (r:=radix2) in Hp.
- edestruct Fcore_digits.digits2_Pnat_correct.
- rewrite Zpower_nat_Z in H0.
- eapply Z.lt_le_trans in Hp; eauto.
- compute_this (radix2^23).
- unfold join_bits; destruct b.
- compute_this ((2 ^ 8 + 255) * 2 ^ 23). smart_omega.
- compute_this ((0 + 255) * 2 ^ 23). smart_omega.
- unfold bits_of_binary_float, join_bits.
- destruct (andb_prop _ _ e0); apply Zle_bool_imp_le in H0; apply Zeq_bool_eq in H.
- unfold FLT_exp in H.
- match goal with [H:Zmax ?x ?y = e|-_] => pose proof (Zle_max_l x y); pose proof (Zle_max_r x y) end.
- rewrite H, Fcalc_digits.Z_of_nat_S_digits2_Pnat in *.
- lapply (Fcalc_digits.Zpower_gt_Zdigits radix2 24 (Zpos m)). intro.
- unfold radix2, radix_val, Zabs in H3.
- pose proof (Zle_bool_spec (2 ^ 23) (Zpos m)).
- compute_this (2^23); compute_this (2^24); compute_this (2^8); compute_this (2^(8-1)).
- assert (Zpos m > 0); [exact eq_refl|].
- inversion H4; destruct b; now smart_omega.
- change Fcalc_digits.radix2 with radix2 in H1; omega.
+ generalize (bits_of_binary_float_range 52 11 __ __ f).
+ change (2^(52+11+1)) with (Int64.max_unsigned + 1). omega.
Qed.
-Theorem bits_of_singleoffloat:
- forall f, bits_of_single (singleoffloat f) = bits_of_single f.
+Theorem to_of_bits:
+ forall b, to_bits (of_bits b) = b.
Proof.
- intro; unfold singleoffloat, bits_of_single; rewrite binary32offloatofbinary32offloat; reflexivity.
-Qed.
-
-Theorem singleoffloat_of_bits:
- forall b, singleoffloat (single_of_bits b) = single_of_bits b.
-Proof.
- intro; unfold singleoffloat, single_of_bits; rewrite floatofbinary32offloatofbinary32; reflexivity.
-Qed.
-
-Theorem single_of_bits_is_single:
- forall b, is_single (single_of_bits b).
-Proof.
- intros. exists (b32_of_bits (Int.unsigned b)); auto.
+ intros; unfold of_bits, to_bits, bits_of_b64, b64_of_bits.
+ rewrite bits_of_binary_float_of_bits. apply Int64.repr_unsigned.
+ apply Int64.unsigned_range.
Qed.
(** Conversions between floats and unsigned ints can be defined
@@ -740,279 +462,103 @@ Qed.
Definition ox8000_0000 := Int.repr Int.half_modulus. (**r [0x8000_0000] *)
-Lemma round_exact:
- forall n, -2^53 < n < 2^53 ->
- round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R n) = Z2R n.
-Proof.
- intros; rewrite round_generic; [reflexivity|now apply valid_rnd_round_mode|].
- apply generic_format_FLT; exists (Float radix2 n 0).
- unfold F2R, Fnum, Fexp, bpow; rewrite Rmult_1_r; intuition.
- pose proof (Zabs_spec n); now smart_omega.
-Qed.
-
-Lemma binary_normalize64_exact:
- forall n, -2^53 < n < 2^53 ->
- B2R _ _ (binary_normalize64 n 0 false) = Z2R n /\
- is_finite _ _ (binary_normalize64 n 0 false) = true.
-Proof.
- intros; pose proof (binary_normalize64_correct n 0 false).
- unfold F2R, Fnum, Fexp, bpow in H0; rewrite Rmult_1_r, round_exact, Rlt_bool_true in H0; try now intuition.
- rewrite <- Z2R_abs; apply Z2R_lt; pose proof (Zabs_spec n); now smart_omega.
-Qed.
-
-Theorem floatofintu_floatofint_1:
+Theorem of_intu_of_int_1:
forall x,
Int.ltu x ox8000_0000 = true ->
- floatofintu x = floatofint x.
+ of_intu x = of_int x.
Proof.
- unfold floatofintu, floatofint, Int.signed, Int.ltu; intro.
+ unfold of_intu, of_int, Int.signed, Int.ltu; intro.
change (Int.unsigned ox8000_0000) with Int.half_modulus.
destruct (zlt (Int.unsigned x) Int.half_modulus); now intuition.
Qed.
-Theorem floatofintu_floatofint_2:
+Theorem of_intu_of_int_2:
forall x,
Int.ltu x ox8000_0000 = false ->
- floatofintu x = add (floatofint (Int.sub x ox8000_0000))
- (floatofintu ox8000_0000).
+ of_intu x = add (of_int (Int.sub x ox8000_0000)) (of_intu ox8000_0000).
Proof.
- unfold floatofintu, floatofint, Int.signed, Int.ltu, Int.sub; intros.
- pose proof (Int.unsigned_range x).
- compute_this (Int.unsigned ox8000_0000).
- destruct (zlt (Int.unsigned x) 2147483648); try discriminate.
- rewrite Int.unsigned_repr by smart_omega.
- destruct (zlt ((Int.unsigned x) - 2147483648) Int.half_modulus).
- unfold add, b64_plus.
- match goal with [|- _ = Bplus _ _ _ _ _ _ ?x ?y] =>
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
- do 2 rewrite (fun x H => proj1 (binary_normalize64_exact x H)) in H1 by smart_omega.
- do 2 rewrite (fun x H => proj2 (binary_normalize64_exact x H)) in H1 by smart_omega.
- rewrite <- Z2R_plus, round_exact in H1 by smart_omega.
- rewrite Rlt_bool_true in H1;
- replace (Int.unsigned x - 2147483648 + 2147483648) with (Int.unsigned x) in * by ring.
- apply B2R_inj.
- destruct (binary_normalize64_exact (Int.unsigned x)); [now smart_omega|].
- match goal with [|- _ _ _ ?f = _] => destruct f end; intuition.
- exfalso; simpl in H2; change 0%R with (Z2R 0) in H2; apply eq_Z2R in H2; omega.
- try (change (53 ?= 1024) with Lt in H1). (* for Coq 8.4 *)
- simpl Zcompare in *.
- match goal with [|- _ _ _ ?f = _] => destruct f end; intuition.
- exfalso; simpl in H0; change 0%R with (Z2R 0) in H0; apply eq_Z2R in H0; omega.
- rewrite (fun x H => proj1 (binary_normalize64_exact x H)) by smart_omega; now intuition.
- rewrite <- Z2R_Zpower, <- Z2R_abs by omega; apply Z2R_lt;
- pose proof (Zabs_spec (Int.unsigned x)); now smart_omega.
- exfalso; now smart_omega.
+ unfold add, b64_plus, of_intu, of_int, b64_of_Z; intros.
+ set (y := Int.sub x ox8000_0000).
+ pose proof (Int.unsigned_range x); pose proof (Int.signed_range y).
+ assert (Ry: integer_representable 53 1024 (Int.signed y)).
+ { apply integer_representable_n; auto; smart_omega. }
+ assert (R8: integer_representable 53 1024 (Int.unsigned ox8000_0000)).
+ { apply integer_representable_2p with (p := 31);auto; smart_omega. }
+ rewrite BofZ_plus by auto.
+ f_equal.
+ unfold Int.ltu in H. destruct zlt in H; try discriminate.
+ unfold y, Int.sub. rewrite Int.signed_repr. omega.
+ compute_this (Int.unsigned ox8000_0000); smart_omega.
Qed.
-Lemma Zoffloat_correct:
- forall f,
- match Zoffloat f with
- | Some n =>
- is_finite _ _ f = true /\
- Z2R n = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
- | None =>
- is_finite _ _ f = false
- end.
-Proof.
- destruct f; try now intuition.
- simpl B2R. rewrite round_0. now intuition. now apply valid_rnd_round_mode.
- destruct e. split. reflexivity.
- rewrite round_generic. symmetry. now apply Rmult_1_r.
- now apply valid_rnd_round_mode.
- apply generic_format_FIX. exists (Float radix2 (cond_Zopp b (Zpos m)) 0). split; reflexivity.
- split; [reflexivity|].
- rewrite round_generic, Z2R_mult, Z2R_Zpower_pos, <- bpow_powerRZ;
- [reflexivity|now apply valid_rnd_round_mode|apply generic_format_F2R; discriminate].
- rewrite (inbetween_float_ZR_sign _ _ _ ((Zpos m) / Zpower_pos radix2 p)
- (new_location (Zpower_pos radix2 p) (Zpos m mod Zpower_pos radix2 p) loc_Exact)).
- unfold B2R, F2R, Fnum, Fexp, canonic_exp, bpow, FIX_exp, Zoffloat, radix2, radix_val.
- pose proof (Rlt_bool_spec (Z2R (cond_Zopp b (Zpos m)) * / Z2R (Zpower_pos 2 p)) 0).
- inversion H; rewrite <- (Rmult_0_l (bpow radix2 (Zneg p))) in H1.
- apply Rmult_lt_reg_r in H1. apply (lt_Z2R _ 0) in H1.
- destruct b; [split; [|ring_simplify];reflexivity|discriminate].
- now apply bpow_gt_0.
- apply Rmult_le_reg_r in H1. apply (le_Z2R 0) in H1.
- destruct b; [destruct H1|split; [|ring_simplify]]; reflexivity.
- now apply (bpow_gt_0 radix2 (Zneg p)).
- unfold canonic_exp, FIX_exp; replace 0 with (Zneg p + Zpos p) by apply Zplus_opp_r.
- apply (inbetween_float_new_location radix2 _ _ _ _ (Zpos p)); [reflexivity|].
- apply inbetween_Exact; unfold B2R, F2R, Fnum, Fexp; destruct b.
- rewrite Rabs_left; [simpl; ring_simplify; reflexivity|].
- replace 0%R with (0*(bpow radix2 (Zneg p)))%R by ring; apply Rmult_gt_compat_r.
- now apply bpow_gt_0.
- apply (Z2R_lt _ 0); reflexivity.
- apply Rabs_right; replace 0%R with (0*(bpow radix2 (Zneg p)))%R by ring; apply Rgt_ge.
- apply Rmult_gt_compat_r; [now apply bpow_gt_0|apply (Z2R_lt 0); reflexivity].
-Qed.
-
-Theorem intoffloat_correct:
- forall f,
- match intoffloat f with
- | Some n =>
- is_finite _ _ f = true /\
- Z2R (Int.signed n) = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
- | None =>
- is_finite _ _ f = false \/
- (B2R _ _ f <= Z2R (Zpred Int.min_signed)\/
- Z2R (Zsucc Int.max_signed) <= B2R _ _ f)%R
- end.
-Proof.
- intro; pose proof (Zoffloat_correct f); unfold intoffloat; destruct (Zoffloat f).
- pose proof (Zle_bool_spec Int.min_signed z); pose proof (Zle_bool_spec z Int.max_signed).
- compute_this Int.min_signed; compute_this Int.max_signed; destruct H.
- inversion H0; [inversion H1|].
- rewrite <- (Int.signed_repr z) in H2 by smart_omega; split; assumption.
- right; right; eapply Rle_trans; [apply Z2R_le; apply Zlt_le_succ; now apply H6|].
- rewrite H2, round_ZR_pos.
- unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
- do 2 rewrite Rmult_1_r; now apply Zfloor_lb.
- apply Rnot_lt_le; intro; apply Rlt_le in H7; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H7;
- rewrite <- H2, round_0 in H7; [apply (le_Z2R _ 0) in H7; now smart_omega|now apply valid_rnd_round_mode].
- right; left; eapply Rle_trans; [|apply (Z2R_le z); simpl; omega].
- rewrite H2, round_ZR_neg.
- unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
- do 2 rewrite Rmult_1_r; now apply Zceil_ub.
- apply Rnot_lt_le; intro; apply Rlt_le in H5; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H5.
- rewrite <- H2, round_0 in H5; [apply (le_Z2R 0) in H5; omega|now apply valid_rnd_round_mode].
- left; assumption.
-Qed.
-
-Theorem intuoffloat_correct:
- forall f,
- match intuoffloat f with
- | Some n =>
- is_finite _ _ f = true /\
- Z2R (Int.unsigned n) = round radix2 (FIX_exp 0) (round_mode mode_ZR) (B2R _ _ f)
- | None =>
- is_finite _ _ f = false \/
- (B2R _ _ f <= -1 \/
- Z2R (Zsucc Int.max_unsigned) <= B2R _ _ f)%R
- end.
-Proof.
- intro; pose proof (Zoffloat_correct f); unfold intuoffloat; destruct (Zoffloat f).
- pose proof (Zle_bool_spec 0 z); pose proof (Zle_bool_spec z Int.max_unsigned).
- compute_this Int.max_unsigned; destruct H.
- inversion H0. inversion H1.
- rewrite <- (Int.unsigned_repr z) in H2 by smart_omega; split; assumption.
- right; right; eapply Rle_trans; [apply Z2R_le; apply Zlt_le_succ; now apply H6|].
- rewrite H2, round_ZR_pos.
- unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow;
- do 2 rewrite Rmult_1_r; now apply Zfloor_lb.
- apply Rnot_lt_le; intro; apply Rlt_le in H7; eapply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H7;
- rewrite <- H2, round_0 in H7; [apply (le_Z2R _ 0) in H7; now smart_omega|now apply valid_rnd_round_mode].
- right; left; eapply Rle_trans; [|change (-1)%R with (Z2R (-1)); apply (Z2R_le z); omega].
- rewrite H2, round_ZR_neg; unfold round, scaled_mantissa, canonic_exp, FIX_exp, F2R, Fnum, Fexp; simpl bpow.
- do 2 rewrite Rmult_1_r; now apply Zceil_ub.
- apply Rnot_lt_le; intro; apply Rlt_le in H5; apply (round_le radix2 (FIX_exp 0) (round_mode mode_ZR)) in H5;
- rewrite <- H2, round_0 in H5; [apply (le_Z2R 0) in H5; omega|now apply valid_rnd_round_mode].
- left; assumption.
-Qed.
-
-Lemma intuoffloat_interval:
- forall f n,
- intuoffloat f = Some n ->
- (-1 < B2R _ _ f < Z2R (Zsucc Int.max_unsigned))%R.
-Proof.
- intro; pose proof (intuoffloat_correct f); destruct (intuoffloat f); try discriminate; destruct H.
- destruct f; try discriminate; intros.
- simpl B2R; change 0%R with (Z2R 0); change (-1)%R with (Z2R (-1)); split; apply Z2R_lt; reflexivity.
- pose proof (Int.unsigned_range i).
- unfold round, scaled_mantissa, B2R, F2R, Fnum, Fexp in H0 |- *; simpl bpow in H0; do 2 rewrite Rmult_1_r in H0;
- apply eq_Z2R in H0.
- split; apply Rnot_le_lt; intro.
- rewrite Ztrunc_ceil in H0;
- [apply Zceil_le in H3; change (-1)%R with (Z2R (-1)) in H3; rewrite Zceil_Z2R in H3; omega|].
- eapply Rle_trans; [now apply H3|apply (Z2R_le (-1) 0); discriminate].
- rewrite Ztrunc_floor in H0; [apply Zfloor_le in H3; rewrite Zfloor_Z2R in H3; now smart_omega|].
- eapply Rle_trans; [|now apply H3]; apply (Z2R_le 0); discriminate.
-Qed.
-
-Theorem intuoffloat_intoffloat_1:
+Theorem to_intu_to_int_1:
forall x n,
- cmp Clt x (floatofintu ox8000_0000) = true ->
- intuoffloat x = Some n ->
- intoffloat x = Some n.
-Proof.
- intros; unfold cmp in H; pose proof (order_float_finite_correct x (floatofintu ox8000_0000)).
- destruct (order_float x (floatofintu ox8000_0000)); try destruct c; try discriminate.
- pose proof (intuoffloat_correct x); rewrite H0 in H2; destruct H2.
- specialize (H1 H2 eq_refl); pose proof (intoffloat_correct x); destruct (intoffloat x).
- f_equal; rewrite <- (proj2 H4) in H3; apply eq_Z2R in H3.
- pose proof (eq_refl (Int.repr (Int.unsigned n))); rewrite H3 in H5 at 1.
- rewrite Int.repr_signed, Int.repr_unsigned in H5; assumption.
- destruct H4; [rewrite H2 in H4; discriminate|].
- apply intuoffloat_interval in H0; exfalso; destruct H0, H4.
- eapply Rlt_le_trans in H0; [|now apply H4]; apply (lt_Z2R (-1)) in H0; discriminate.
- apply Rcompare_Lt_inv in H1; eapply Rle_lt_trans in H1; [|now apply H4].
- unfold floatofintu in H1; rewrite (fun x H => proj1 (binary_normalize64_exact x H)) in H1;
- [apply lt_Z2R in H1; discriminate|split; reflexivity].
-Qed.
-
-Lemma Zfloor_minus :
- forall x n, Zfloor(x-Z2R n) = Zfloor(x)-n.
-Proof.
- intros; apply Zfloor_imp; replace (Zfloor x - n + 1) with (Zfloor x + 1 - n) by ring; do 2 rewrite Z2R_minus.
- split;
- [apply Rplus_le_compat_r; now apply Zfloor_lb|
- apply Rplus_lt_compat_r; rewrite Z2R_plus; now apply Zfloor_ub].
-Qed.
-
-Theorem intuoffloat_intoffloat_2:
+ cmp Clt x (of_intu ox8000_0000) = true ->
+ to_intu x = Some n ->
+ to_int x = Some n.
+Proof.
+ intros. unfold to_intu in H0.
+ destruct (b64_to_Z_range x 0 Int.max_unsigned) as [p|] eqn:E; simpl in H0; inv H0.
+ unfold b64_to_Z_range in E. exploit ZofB_range_inversion; eauto. intros (A & B & C).
+ unfold to_int, b64_to_Z_range. unfold ZofB_range. rewrite C.
+ rewrite Zle_bool_true by smart_omega. rewrite Zle_bool_true; auto.
+ exploit (BofZ_exact 53 1024 __ __ (Int.unsigned ox8000_0000)).
+ vm_compute; intuition congruence.
+ set (y := of_intu ox8000_0000) in *.
+ change (BofZ 53 1024 eq_refl eq_refl (Int.unsigned ox8000_0000)) with y.
+ intros (EQy & FINy & SIGNy).
+ assert (FINx: is_finite _ _ x = true).
+ { rewrite ZofB_correct in C. destruct (is_finite _ _ x) eqn:FINx; congruence. }
+ destruct (zeq p 0).
+ subst p; smart_omega.
+ destruct (ZofB_range_pos 53 1024 __ __ x p C) as [P Q]. omega.
+ assert (CMP: b64_compare x y = Some Lt).
+ { unfold cmp, cmp_of_comparison in H. destruct (b64_compare x y) as [[]|]; auto; discriminate. }
+ unfold b64_compare in CMP. rewrite Bcompare_finite_correct in CMP by auto.
+ inv CMP. apply Rcompare_Lt_inv in H1. rewrite EQy in H1.
+ assert (p < Int.unsigned ox8000_0000).
+ { apply lt_Z2R. eapply Rle_lt_trans; eauto. }
+ change Int.max_signed with (Int.unsigned ox8000_0000 - 1). omega.
+Qed.
+
+Theorem to_intu_to_int_2:
forall x n,
- cmp Clt x (floatofintu ox8000_0000) = false ->
- intuoffloat x = Some n ->
- intoffloat (sub x (floatofintu ox8000_0000)) = Some (Int.sub n ox8000_0000).
-Proof.
- assert (B2R _ _ (floatofintu ox8000_0000) = Z2R (Int.unsigned ox8000_0000)).
- apply (fun x H => proj1 (binary_normalize64_exact x H)); split; reflexivity.
- intros; unfold cmp in H0; pose proof (order_float_finite_correct x (floatofintu ox8000_0000)).
- destruct (order_float x (floatofintu ox8000_0000)); try destruct c; try discriminate;
- pose proof (intuoffloat_correct x); rewrite H1 in H3; destruct H3; specialize (H2 H3 eq_refl).
- apply Rcompare_Eq_inv in H2; apply B2R_inj in H2.
- subst x; vm_compute in H1; injection H1; intro; subst n; vm_compute; reflexivity.
- destruct x; try discriminate H3;
- [rewrite H in H2; simpl B2R in H2; apply (eq_Z2R 0) in H2; discriminate|reflexivity].
- reflexivity.
- rewrite H in H2; apply Rcompare_Gt_inv in H2; pose proof (intuoffloat_interval _ _ H1).
- unfold sub, b64_minus.
- exploit (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x (floatofintu ox8000_0000)); [assumption|reflexivity|]; intro.
- rewrite H, round_generic in H6.
- match goal with [H6:if Rlt_bool ?x ?y then _ else _|-_] =>
- pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end.
- destruct H6 as [? []].
- match goal with [|- _ ?y = _] => pose proof (intoffloat_correct y); destruct (intoffloat y) end.
- destruct H10.
- f_equal; rewrite <- (Int.repr_signed i); unfold Int.sub; f_equal; apply eq_Z2R.
- rewrite Z2R_minus, H11, H4.
- unfold round, scaled_mantissa, F2R, Fexp, Fnum, round_mode; simpl bpow; repeat rewrite Rmult_1_r;
- rewrite <- Z2R_minus; f_equal.
- rewrite (Ztrunc_floor (B2R _ _ x)), <- Zfloor_minus, <- Ztrunc_floor;
- [f_equal; assumption|apply Rle_0_minus; left; assumption|].
- left; eapply Rlt_trans; [|now apply H2]; apply (Z2R_lt 0); reflexivity.
- try (change (0 ?= 53) with Lt in H6,H8). (* for Coq 8.4 *)
- try (change (53 ?= 1024) with Lt in H6,H8). (* for Coq 8.4 *)
- exfalso; simpl Zcompare in H6, H8; rewrite H6, H8 in H10.
- destruct H10 as [|[]]; [discriminate|..].
- eapply Rle_trans in H10; [|apply Rle_0_minus; left; assumption]; apply (le_Z2R 0) in H10; apply H10; reflexivity.
- eapply Rle_lt_trans in H10; [|apply Rplus_lt_compat_r; now apply (proj2 H5)].
- rewrite <- Z2R_opp, <- Z2R_plus in H10; apply lt_Z2R in H10; discriminate.
- exfalso; inversion H7; rewrite Rabs_right in H8.
- eapply Rle_lt_trans in H8. apply Rle_not_lt in H8; [assumption|apply (bpow_le _ 31); discriminate].
- change (bpow radix2 31) with (Z2R(Zsucc Int.max_unsigned - Int.unsigned ox8000_0000)); rewrite Z2R_minus.
- apply Rplus_lt_compat_r; exact (proj2 H5).
- apply Rle_ge; apply Rle_0_minus; left; assumption.
- now apply valid_rnd_round_mode.
- apply Fprop_Sterbenz.sterbenz_aux; [now apply fexp_monotone|now apply generic_format_B2R| |].
- rewrite <- H; now apply generic_format_B2R.
- destruct H5; split; left; assumption.
- now destruct H2.
+ cmp Clt x (of_intu ox8000_0000) = false ->
+ to_intu x = Some n ->
+ to_int (sub x (of_intu ox8000_0000)) = Some (Int.sub n ox8000_0000).
+Proof.
+ intros. unfold to_intu in H0.
+ destruct (b64_to_Z_range x 0 Int.max_unsigned) as [p|] eqn:E; simpl in H0; inv H0.
+ unfold b64_to_Z_range in E. exploit ZofB_range_inversion; eauto. intros (A & B & C).
+ exploit (BofZ_exact 53 1024 __ __ (Int.unsigned ox8000_0000)).
+ vm_compute; intuition congruence.
+ set (y := of_intu ox8000_0000) in *.
+ change (BofZ 53 1024 __ __ (Int.unsigned ox8000_0000)) with y.
+ intros (EQy & FINy & SIGNy).
+ assert (FINx: is_finite _ _ x = true).
+ { rewrite ZofB_correct in C. destruct (is_finite _ _ x) eqn:FINx; congruence. }
+ assert (GE: (B2R _ _ x >= Z2R (Int.unsigned ox8000_0000))%R).
+ { rewrite <- EQy. unfold cmp, cmp_of_comparison, b64_compare in H.
+ rewrite Bcompare_finite_correct in H by auto.
+ destruct (Rcompare (B2R 53 1024 x) (B2R 53 1024 y)) eqn:CMP.
+ apply Req_ge; apply Rcompare_Eq_inv; auto.
+ discriminate.
+ apply Rgt_ge; apply Rcompare_Gt_inv; auto.
+ }
+ assert (EQ: b64_to_Z_range (sub x y) Int.min_signed Int.max_signed = Some (p - Int.unsigned ox8000_0000)).
+ {
+ apply ZofB_range_minus. exact E.
+ compute_this (Int.unsigned ox8000_0000). smart_omega.
+ apply Rge_le; auto.
+ }
+ unfold to_int; rewrite EQ. simpl. f_equal. unfold Int.sub. f_equal. f_equal.
+ symmetry; apply Int.unsigned_repr. omega.
Qed.
(** Conversions from ints to floats can be defined as bitwise manipulations
over the in-memory representation. This is what the PowerPC port does.
The trick is that [from_words 0x4330_0000 x] is the float
- [2^52 + floatofintu x]. *)
+ [2^52 + of_intu x]. *)
Definition ox4330_0000 := Int.repr 1127219200. (**r [0x4330_0000] *)
@@ -1032,55 +578,42 @@ Qed.
Lemma from_words_value:
forall x,
- B2R _ _ (from_words ox4330_0000 x) =
- (bpow radix2 52 + Z2R (Int.unsigned x))%R /\
- is_finite _ _ (from_words ox4330_0000 x) = true.
+ B2R _ _ (from_words ox4330_0000 x) = (bpow radix2 52 + Z2R (Int.unsigned x))%R
+ /\ is_finite _ _ (from_words ox4330_0000 x) = true
+ /\ Bsign _ _ (from_words ox4330_0000 x) = false.
Proof.
- intros; unfold from_words, double_of_bits, b64_of_bits, binary_float_of_bits.
- rewrite B2R_FF2B. rewrite is_finite_FF2B.
+ intros; unfold from_words, of_bits, b64_of_bits, binary_float_of_bits.
+ rewrite B2R_FF2B, is_finite_FF2B, Bsign_FF2B.
unfold binary_float_of_bits_aux; rewrite split_bits_or; simpl; pose proof (Int.unsigned_range x).
destruct (Int.unsigned x + Zpower_pos 2 52) eqn:?.
exfalso; now smart_omega.
- simpl; rewrite <- Heqz; unfold F2R; simpl.
- rewrite <- (Z2R_plus 4503599627370496), Rmult_1_r.
- split; [f_equal; compute_this (Zpower_pos 2 52); ring | reflexivity].
- assert (Zneg p < 0) by reflexivity.
+ simpl; rewrite <- Heqz; unfold F2R; simpl. split; auto.
+ rewrite <- (Z2R_plus 4503599627370496), Rmult_1_r. f_equal. rewrite Zplus_comm. auto.
exfalso; now smart_omega.
Qed.
-Theorem floatofintu_from_words:
- forall x,
- floatofintu x =
- sub (from_words ox4330_0000 x) (from_words ox4330_0000 Int.zero).
+Lemma from_words_eq:
+ forall x, from_words ox4330_0000 x = BofZ 53 1024 __ __ (2^52 + Int.unsigned x).
Proof.
- intros; destruct (Int.eq_dec x Int.zero); [subst; vm_compute; reflexivity|].
- assert (Int.unsigned x <> 0).
- intro; destruct n; rewrite <- (Int.repr_unsigned x), H; reflexivity.
+ intros.
pose proof (Int.unsigned_range x).
- pose proof (binary_normalize64_exact (Int.unsigned x)). destruct H1; [smart_omega|].
- unfold floatofintu, sub, b64_minus.
- match goal with [|- _ = Bminus _ _ _ _ _ _ ?x ?y] =>
- pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
- apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
- do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
- rewrite Int.unsigned_zero in H3.
- replace (bpow radix2 52 + Z2R (Int.unsigned x) -
- (bpow radix2 52 + Z2R 0))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
- rewrite round_exact in H3 by smart_omega.
- match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] =>
- pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3 as [? []].
- try (change (53 ?= 1024) with Lt in H3,H5). (* for Coq 8.4 *)
- simpl Zcompare in *; apply B2R_inj;
- try match goal with [H':B2R _ _ ?f = _ , H'':is_finite _ _ ?f = true |- is_finite_strict _ _ ?f = true] =>
- destruct f; [
- simpl in H'; change 0%R with (Z2R 0) in H'; apply eq_Z2R in H'; now destruct (H (eq_sym H')) |
- discriminate H'' | discriminate H'' | reflexivity
- ]
- end.
- rewrite H3; assumption.
- inversion H4; change (bpow radix2 1024) with (Z2R (radix2 ^ 1024)) in H5; rewrite <- Z2R_abs in H5.
- apply le_Z2R in H5; pose proof (Zabs_spec (Int.unsigned x));
- exfalso; now smart_omega.
+ destruct (from_words_value x) as (A & B & C).
+ destruct (BofZ_exact 53 1024 __ __ (2^52 + Int.unsigned x)) as (D & E & F).
+ smart_omega.
+ apply B2R_Bsign_inj; auto.
+ rewrite A, D. rewrite Z2R_plus. auto.
+ rewrite C, F. symmetry. apply Zlt_bool_false. smart_omega.
+Qed.
+
+Theorem of_intu_from_words:
+ forall x,
+ of_intu x = sub (from_words ox4330_0000 x) (from_words ox4330_0000 Int.zero).
+Proof.
+ intros. pose proof (Int.unsigned_range x).
+ rewrite ! from_words_eq. unfold sub, b64_minus. rewrite BofZ_minus.
+ unfold of_intu, b64_of_Z. f_equal. rewrite Int.unsigned_zero. omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto; rewrite Int.unsigned_zero; smart_omega.
Qed.
Lemma ox8000_0000_signed_unsigned:
@@ -1095,337 +628,19 @@ Proof.
apply Int.eqm_add; [now apply Int.eqm_refl|exists 1;reflexivity].
Qed.
-Theorem floatofint_from_words:
+Theorem of_int_from_words:
forall x,
- floatofint x =
- sub (from_words ox4330_0000 (Int.add x ox8000_0000))
- (from_words ox4330_0000 ox8000_0000).
-Proof.
-Local Transparent Int.repr Int64.repr.
- intros; destruct (Int.eq_dec x Int.zero); [subst; vm_compute; reflexivity|].
- assert (Int.signed x <> 0).
- intro; destruct n; rewrite <- (Int.repr_signed x), H; reflexivity.
- pose proof (Int.signed_range x).
- pose proof (binary_normalize64_exact (Int.signed x)); destruct H1; [now smart_omega|].
- unfold floatofint, sub, b64_minus.
- match goal with [|- _ = Bminus _ _ _ _ _ _ ?x ?y] =>
- pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE x y) end.
- apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
- do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
- replace (bpow radix2 52 + Z2R (Int.unsigned (Int.add x ox8000_0000)) -
- (bpow radix2 52 + Z2R (Int.unsigned ox8000_0000)))%R with (Z2R (Int.signed x)) in H3
- by (rewrite ox8000_0000_signed_unsigned; rewrite Z2R_plus; simpl; ring).
- rewrite round_exact in H3 by smart_omega.
- match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] =>
- pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3 as [? []].
- try (change (0 ?= 53) with Lt in H3,H5). (* for Coq 8.4 *)
- try (change (53 ?= 1024) with Lt in H3,H5). (* for Coq 8.4 *)
- simpl Zcompare in *; apply B2R_inj;
- try match goal with [H':B2R _ _ ?f = _ , H'':is_finite _ _ ?f = true |- is_finite_strict _ _ ?f = true] =>
- destruct f; [
- simpl in H'; change 0%R with (Z2R 0) in H'; apply eq_Z2R in H'; now destruct (H (eq_sym H')) |
- discriminate H'' | discriminate H'' | reflexivity
- ]
- end.
- rewrite H3; assumption.
- inversion H4; unfold bpow in H5; rewrite <- Z2R_abs in H5;
- apply le_Z2R in H5; pose proof (Zabs_spec (Int.signed x)); exfalso; now smart_omega.
-Qed.
-
-(** Conversions from 32-bit integers to single-precision floats can
- be decomposed into a conversion to a double-precision float,
- followed by a [singleoffloat] normalization. No double rounding occurs. *)
-
-Lemma is_finite_strict_ge_1:
- forall (f: binary32),
- is_finite _ _ f = true ->
- (1 <= Rabs (B2R _ _ f))%R ->
- is_finite_strict _ _ f = true.
-Proof.
- intros. destruct f; auto. simpl in H0.
- change 0%R with (Z2R 0) in H0.
- change 1%R with (Z2R 1) in H0.
- rewrite <- Z2R_abs in H0.
- exploit le_Z2R; eauto.
-Qed.
-
-Lemma single_float_of_int:
- forall n,
- -2^53 < n < 2^53 ->
- singleoffloat (binary_normalize64 n 0 false) = floatofbinary32 (binary_normalize32 n 0 false).
-Proof.
- intros. unfold singleoffloat. f_equal.
- assert (EITHER: n = 0 \/ Z.abs n > 0) by (destruct n; compute; auto).
- destruct EITHER as [EQ|GT].
- subst n; reflexivity.
- exploit binary_normalize64_exact; eauto. intros [A B].
- destruct (binary_normalize64 n 0 false) as [ | | | s m e] eqn:B64; simpl in *.
-- assert (0 = n) by (apply eq_Z2R; auto). subst n. simpl in GT. omegaContradiction.
-- discriminate.
-- discriminate.
-- set (n1 := cond_Zopp s (Z.pos m)) in *.
- generalize (binary_normalize32_correct n1 e s).
- fold (binary_normalize32 n1 e s). intros C.
- generalize (binary_normalize32_correct n 0 false).
- fold (binary_normalize32 n 0 false). intros D.
- assert (A': @F2R radix2 {| Fnum := n; Fexp := 0 |} = Z2R n).
- { unfold F2R. apply Rmult_1_r. }
- rewrite A in C. rewrite A' in D.
- destruct (Rlt_bool
- (Rabs
- (round radix2 (FLT_exp (3 - 128 - 24) 24) (round_mode mode_NE)
- (Z2R n))) (bpow radix2 128)).
-+ destruct C as [C1 [C2 _]]; destruct D as [D1 [D2 _]].
- assert (1 <= Rabs (round radix2 (FLT_exp (3 - 128 - 24) 24) (round_mode mode_NE) (Z2R n)))%R.
- { apply abs_round_ge_generic.
- apply fexp_correct. red. omega.
- apply valid_rnd_round_mode.
- apply generic_format_bpow with (e := 0). compute. congruence.
- rewrite <- Z2R_abs. change 1%R with (Z2R 1). apply Z2R_le. omega. }
- apply B2R_inj.
- apply is_finite_strict_ge_1; auto. rewrite C1; auto.
- apply is_finite_strict_ge_1; auto. rewrite D1; auto.
- congruence.
-+ apply B2FF_inj. congruence.
-Qed.
-
-Theorem singleofint_floatofint:
- forall n, singleofint n = singleoffloat (floatofint n).
-Proof.
- intros. symmetry. apply single_float_of_int.
- generalize (Int.signed_range n). smart_omega.
-Qed.
-
-Theorem singleofintu_floatofintu:
- forall n, singleofintu n = singleoffloat (floatofintu n).
-Proof.
- intros. symmetry. apply single_float_of_int.
- generalize (Int.unsigned_range n). smart_omega.
-Qed.
-
-Theorem mul2_add:
- forall f, add f f = mul f (floatofint (Int.repr 2%Z)).
-Proof.
- intros. unfold add, b64_plus, mul, b64_mult.
- destruct (is_finite_strict _ _ f) eqn:EQFINST.
- - assert (EQFIN:is_finite _ _ f = true) by (destruct f; simpl in *; congruence).
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f f EQFIN EQFIN).
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f
- (floatofint (Int.repr 2%Z))).
- rewrite <- double, Rmult_comm in H.
- replace (B2R 53 1024 (floatofint (Int.repr 2))) with 2%R in H0 by (compute; field).
- destruct Rlt_bool.
- + destruct H0 as [? []], H as [? []].
- rewrite EQFIN in H1.
- apply B2R_Bsign_inj; auto.
- etransitivity. apply H. symmetry. apply H0.
- etransitivity. apply H4. symmetry. etransitivity. apply H2.
- destruct Bmult; try reflexivity; discriminate.
- simpl. rewrite xorb_false_r.
- erewrite <- Rmult_0_l, Rcompare_mult_r.
- destruct f; try discriminate EQFINST.
- simpl. unfold F2R.
- erewrite <- Rmult_0_l, Rcompare_mult_r.
- rewrite Rcompare_Z2R with (y:=0).
- destruct b; reflexivity.
- apply bpow_gt_0.
- apply (Z2R_lt 0 2). omega.
- + destruct H.
- apply B2FF_inj.
- etransitivity. apply H.
- symmetry. etransitivity. apply H0.
- f_equal. destruct Bsign; reflexivity.
- - destruct f as [[]|[]| |]; try discriminate; simpl.
- auto. auto. auto. auto.
- destruct (Archi.choose_binop_pl b n b n); auto.
-Qed.
-
-Program Definition pow2_float (b:bool) (e:Z) (H:-1023 < e < 1023) : float :=
- B754_finite _ _ b (nat_iter 52 xO xH) (e-52) _.
-Next Obligation.
- unfold Fappli_IEEE.bounded, canonic_mantissa.
- rewrite andb_true_iff, Zle_bool_true by omega. split; auto.
- apply Zeq_bool_true. unfold FLT_exp. simpl Z.of_nat.
- apply Z.max_case_strong; omega.
-Qed.
-
-Theorem mul_div_pow2:
- forall b e f H H',
- mul f (pow2_float b e H) = div f (pow2_float b (-e) H').
-Proof.
- intros. unfold mul, b64_mult, div, b64_div.
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f (pow2_float b e H)).
- pose proof (Bdiv_correct 53 1024 eq_refl eq_refl binop_pl mode_NE f (pow2_float b (-e) H')).
- lapply H1. clear H1. intro.
- change (is_finite 53 1024 (pow2_float b e H)) with true in H0.
- unfold Rdiv in H1.
- replace (/ B2R 53 1024 (pow2_float b (-e) H'))%R
- with (B2R 53 1024 (pow2_float b e H)) in H1.
- destruct (is_finite _ _ f) eqn:EQFIN.
- - destruct Rlt_bool.
- + destruct H0 as [? []], H1 as [? []].
- apply B2R_Bsign_inj; auto.
- etransitivity. apply H0. symmetry. apply H1.
- etransitivity. apply H3. destruct Bmult; try discriminate H2; reflexivity.
- symmetry. etransitivity. apply H5. destruct Bdiv; try discriminate H4; reflexivity.
- reflexivity.
- + apply B2FF_inj.
- etransitivity. apply H0. symmetry. etransitivity. apply H1.
- reflexivity.
- - destruct f; try discriminate EQFIN; auto.
- - simpl.
- assert ((4503599627370496 * bpow radix2 (e - 52))%R =
- (/ (4503599627370496 * bpow radix2 (- e - 52)))%R).
- { etransitivity. symmetry. apply (bpow_plus radix2 52).
- symmetry. etransitivity. apply f_equal. symmetry. apply (bpow_plus radix2 52).
- rewrite <- bpow_opp. f_equal. ring. }
- destruct b. unfold cond_Zopp.
- rewrite !F2R_Zopp, <- Ropp_inv_permute. f_equal. auto.
- intro. apply F2R_eq_0_reg in H3. omega.
- apply H2.
- - simpl. intro. apply F2R_eq_0_reg in H2.
- destruct b; simpl in H2; omega.
-Qed.
-
-Definition exact_inverse_mantissa := nat_iter 52 xO xH.
-
-Program Definition exact_inverse (f: float) : option float :=
- match f with
- | B754_finite s m e B =>
- if peq m exact_inverse_mantissa then
- if zlt (-1023) (e + 52) then
- if zlt (e + 52) 1023 then
- Some(B754_finite _ _ s m (-e - 104) _)
- else None else None else None
- | _ => None
- end.
-Next Obligation.
- unfold Fappli_IEEE.bounded, canonic_mantissa. apply andb_true_iff; split.
- simpl Z.of_nat. apply Zeq_bool_true. unfold FLT_exp. apply Z.max_case_strong; omega.
- apply Zle_bool_true. omega.
-Qed.
-
-Remark B754_finite_eq:
- forall s1 m1 e1 B1 s2 m2 e2 B2,
- s1 = s2 -> m1 = m2 -> e1 = e2 ->
- B754_finite _ _ s1 m1 e1 B1 = (B754_finite _ _ s2 m2 e2 B2 : float).
+ of_int x = sub (from_words ox4330_0000 (Int.add x ox8000_0000))
+ (from_words ox4330_0000 ox8000_0000).
Proof.
- intros. subst. f_equal. apply proof_irrelevance.
-Qed.
-
-Theorem div_mul_inverse:
- forall x y z, exact_inverse y = Some z -> div x y = mul x z.
-Proof with (try discriminate).
- unfold exact_inverse; intros. destruct y...
- destruct (peq m exact_inverse_mantissa)...
- destruct (zlt (-1023) (e + 52))...
- destruct (zlt (e + 52) 1023)...
- inv H.
- set (n := - e - 52).
- assert (RNG1: -1023 < n < 1023) by (unfold n; omega).
- assert (RNG2: -1023 < -n < 1023) by (unfold n; omega).
- symmetry.
- transitivity (mul x (pow2_float b n RNG1)).
- f_equal. apply B754_finite_eq; auto. unfold n; omega.
- transitivity (div x (pow2_float b (-n) RNG2)).
- apply mul_div_pow2.
- f_equal. apply B754_finite_eq; auto. unfold n; omega.
-Qed.
-
-Theorem floatoflongu_decomp:
- forall l, floatoflongu l =
- add (mul (floatofintu (Int64.hiword l)) (pow2_float false 32 (conj eq_refl eq_refl)))
- (floatofintu (Int64.loword l)).
-Proof.
- intros.
- unfold floatofintu.
- pose proof (Int.unsigned_range (Int64.loword l)).
- pose proof (Int.unsigned_range (Int64.hiword l)).
- pose proof (Int64.unsigned_range l).
- compute_this Int.modulus.
- destruct (binary_normalize64_exact (Int.unsigned (Int64.loword l)));
- [compute_this (2 ^ 53); omega|].
- destruct (binary_normalize64_exact (Int.unsigned (Int64.hiword l)));
- [compute_this (2 ^ 53); omega|].
- unfold mul, b64_mult.
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (binary_normalize64 (Int.unsigned (Int64.hiword l)) 0 false)
- (pow2_float false 32 (conj eq_refl eq_refl))).
- rewrite H4 in H6.
- replace (B2R 53 1024 (pow2_float false 32 (conj eq_refl eq_refl)))
- with (Z2R 4294967296)%R in H6 by (compute; field).
- rewrite <- Z2R_mult in H6.
- rewrite round_generic in H6.
- - rewrite Rlt_bool_true in H6.
- + rewrite H5 in H6.
- destruct H6 as [? [? ?]].
- { unfold add, b64_plus.
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (Bmult 53 1024 eq_refl eq_refl binop_pl mode_NE
- (binary_normalize64 (Int.unsigned (Int64.hiword l)) 0 false)
- (pow2_float false 32 (conj eq_refl eq_refl)))
- (binary_normalize64 (Int.unsigned (Int64.loword l)) 0 false) H7 H3).
- rewrite H6, H2, <- Z2R_plus in H9.
- change 4294967296 with (two_p 32) in H9.
- rewrite <- Int64.ofwords_add', Int64.ofwords_recompose in H9.
- assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.unsigned l))) <
- bpow radix2 1024)%R.
- { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
- eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
- erewrite <- round_generic.
- - apply round_le. apply fexp_correct; reflexivity.
- apply (valid_rnd_round_mode mode_NE).
- rewrite <- Z2R_abs. apply Z2R_le. change (two_p 64) with Int64.modulus. zify; omega.
- - apply (valid_rnd_round_mode mode_NE).
- - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
- rewrite Rlt_bool_true in H9 by auto.
- unfold floatoflongu, binary_normalize64.
- pose proof (binary_normalize64_correct (Int64.unsigned l) 0 false).
- replace (F2R (beta:=radix2) {| Fnum := Int64.unsigned l; Fexp := 0 |})
- with (Z2R (Int64.unsigned l)) in H11
- by (unfold F2R, Fexp, Fnum, bpow; field).
- rewrite Rlt_bool_true in H11 by auto.
- destruct (Int64.eq_dec l Int64.zero). subst. reflexivity.
- destruct H11, H9.
- assert (1 <= round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE)
- (Z2R (Int64.unsigned l)))%R.
- { erewrite <- round_generic with (x:=1%R).
- apply round_le. apply fexp_correct. reflexivity. apply valid_rnd_round_mode.
- assert (Int64.unsigned l <> 0).
- { contradict n. rewrite <- (Int64.repr_unsigned l), n. auto. }
- apply (Z2R_le 1). omega.
- apply valid_rnd_round_mode.
- apply (generic_format_bpow _ _ 0). compute. discriminate. }
- unfold binary_normalize64 in *.
- apply B2R_inj.
- + destruct H12, (binary_normalize 53 1024 eq_refl eq_refl mode_NE (Int64.unsigned l) 0 false); try discriminate.
- unfold B2R in H11. rewrite <- H11 in H14. apply (le_Z2R 1 0) in H14. omega.
- auto.
- + destruct H13; match goal with Hf0:is_finite _ _ ?f0 = true,
- Hf1:B2R _ _ ?f1 = _ |-
- is_finite_strict _ _ ?f = true =>
- change f0 with f in Hf0; change f1 with f in Hf1;
- destruct f
- end; try discriminate.
- unfold B2R in H9. rewrite <- H9 in H14. apply (le_Z2R 1 0) in H14. omega.
- auto.
- + rewrite H11. symmetry. apply H9. }
- + rewrite <- Z2R_abs.
- apply (Z2R_lt _ (radix2 ^ 1024)).
- compute_this (radix2 ^ 1024); zify; omega.
- - apply valid_rnd_round_mode.
- - destruct (Z.eq_dec (Int.unsigned (Int64.hiword l)) 0).
- rewrite e. apply generic_format_0.
- apply generic_format_FLT_FLX.
- + apply Rle_trans with (bpow radix2 0). apply bpow_le. omega.
- rewrite <- Z2R_abs. apply (Z2R_le 1).
- clear - n. zify; omega.
- + apply generic_format_FLX.
- eexists {| Fnum := Int.unsigned (Int64.hiword l); Fexp := 32 |}.
- unfold F2R, Fnum, Fexp. split.
- rewrite Z2R_mult. auto.
- compute_this (radix2 ^ 53). zify; omega.
+ intros.
+ pose proof (Int.signed_range x).
+ rewrite ! from_words_eq. rewrite ox8000_0000_signed_unsigned.
+ change (Int.unsigned ox8000_0000) with Int.half_modulus.
+ unfold sub, b64_minus. rewrite BofZ_minus.
+ unfold of_int, b64_of_Z. f_equal. omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto; smart_omega.
Qed.
Definition ox4530_0000 := Int.repr 1160773632. (**r [0x4530_0000] *)
@@ -1446,359 +661,694 @@ Qed.
Lemma from_words_value':
forall x,
- B2R _ _ (from_words ox4530_0000 x) =
- (bpow radix2 84 + Z2R (Int.unsigned x * two_p 32))%R /\
- is_finite _ _ (from_words ox4530_0000 x) = true.
+ B2R _ _ (from_words ox4530_0000 x) = (bpow radix2 84 + Z2R (Int.unsigned x * two_p 32))%R
+ /\ is_finite _ _ (from_words ox4530_0000 x) = true
+ /\ Bsign _ _ (from_words ox4530_0000 x) = false.
Proof.
- intros; unfold from_words, double_of_bits, b64_of_bits, binary_float_of_bits.
- rewrite B2R_FF2B. rewrite is_finite_FF2B.
+ intros; unfold from_words, of_bits, b64_of_bits, binary_float_of_bits.
+ rewrite B2R_FF2B, is_finite_FF2B, Bsign_FF2B.
unfold binary_float_of_bits_aux; rewrite split_bits_or'; simpl; pose proof (Int.unsigned_range x).
destruct (Int.unsigned x + Zpower_pos 2 52) eqn:?.
exfalso; now smart_omega.
- simpl; rewrite <- Heqz; unfold F2R; simpl.
+ simpl; rewrite <- Heqz; unfold F2R; simpl. split; auto.
rewrite <- (Z2R_plus 19342813113834066795298816), <- (Z2R_mult _ 4294967296).
- split; [f_equal; compute_this (Zpower_pos 2 52);
- compute_this (two_power_pos 32); ring | reflexivity].
+ f_equal; compute_this (Zpower_pos 2 52); compute_this (two_power_pos 32); ring.
assert (Zneg p < 0) by reflexivity.
exfalso; now smart_omega.
Qed.
-Theorem floatoflongu_from_words:
+Lemma from_words_eq':
+ forall x, from_words ox4530_0000 x = BofZ 53 1024 __ __ (2^84 + Int.unsigned x * 2^32).
+Proof.
+ intros.
+ pose proof (Int.unsigned_range x).
+ destruct (from_words_value' x) as (A & B & C).
+ destruct (BofZ_representable 53 1024 __ __ (2^84 + Int.unsigned x * 2^32)) as (D & E & F).
+ replace (2^84 + Int.unsigned x * 2^32)
+ with ((2^52 + Int.unsigned x) * 2^32) by ring.
+ apply integer_representable_n2p; auto. smart_omega. omega. omega.
+ apply B2R_Bsign_inj; auto.
+ rewrite A, D. rewrite <- Z2R_Zpower by omega. rewrite <- Z2R_plus. auto.
+ rewrite C, F. symmetry. apply Zlt_bool_false.
+ compute_this (2^84); compute_this (2^32); omega.
+Qed.
+
+Theorem of_longu_from_words:
forall l,
- floatoflongu l =
+ of_longu l =
add (sub (from_words ox4530_0000 (Int64.hiword l))
(from_words ox4530_0000 (Int.repr (two_p 20))))
(from_words ox4330_0000 (Int64.loword l)).
Proof.
intros.
- pose proof (Int64.unsigned_range l).
+ pose proof (Int64.unsigned_range l).
pose proof (Int.unsigned_range (Int64.hiword l)).
- destruct (from_words_value (Int64.loword l)).
- destruct (from_words_value' (Int64.hiword l)).
- destruct (from_words_value' (Int.repr (two_p 20))).
- unfold sub, b64_minus.
- pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (from_words ox4530_0000 (Int64.hiword l))
- (from_words ox4530_0000 (Int.repr (two_p 20))) H4 H6).
- rewrite round_generic in H7.
- - rewrite H3, H5 in H7.
- replace (bpow radix2 84 + Z2R (Int.unsigned (Int64.hiword l) * two_p 32) -
- (bpow radix2 84 + Z2R (Int.unsigned (Int.repr (two_p 20)) * two_p 32)))%R
- with (Z2R (Int.unsigned (Int64.hiword l) * two_p 32 - two_p 52)) in H7.
- + rewrite Rlt_bool_true in H7.
- * { destruct H7 as [? []].
- unfold floatoflongu, binary_normalize64.
- pose proof (binary_normalize64_correct (Int64.unsigned l) 0 false).
- replace (F2R (beta:=radix2) {| Fnum := Int64.unsigned l; Fexp := 0 |})
- with (Z2R (Int64.unsigned l)) in H10
- by (unfold F2R, Fexp, Fnum, bpow; field).
- assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.unsigned l))) <
- bpow radix2 1024)%R.
- { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
- eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
- erewrite <- round_generic.
- - apply round_le. apply fexp_correct; reflexivity.
- apply (valid_rnd_round_mode mode_NE).
- rewrite <- Z2R_abs. apply Z2R_le. change (two_p 64) with Int64.modulus. zify; omega.
- - apply (valid_rnd_round_mode mode_NE).
- - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
- rewrite Rlt_bool_true in H10 by auto.
- unfold add, b64_plus.
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (Bminus 53 1024 eq_refl eq_refl binop_pl mode_NE
- (from_words ox4530_0000 (Int64.hiword l))
- (from_words ox4530_0000 (Int.repr (two_p 20))))
- (from_words ox4330_0000 (Int64.loword l)) H8 H2).
- change (bpow radix2 52) with (Z2R (two_p 52)) in H1.
- rewrite H7, H1, <- !Z2R_plus in H12.
- replace (Int.unsigned (Int64.hiword l) * two_p 32 - two_p 52 +
- (two_p 52 + Int.unsigned (Int64.loword l)))
- with (Int.unsigned (Int64.hiword l) * two_p 32 + Int.unsigned (Int64.loword l))
- in H12 by ring.
- rewrite <- Int64.ofwords_add', Int64.ofwords_recompose, Rlt_bool_true in H12 by auto.
- destruct (Z.eq_dec (Int64.unsigned l) 0).
- - apply (f_equal Int64.repr) in e. rewrite Int64.repr_unsigned in e.
- subst. reflexivity.
- - destruct H12 as [? []], H10 as [? []].
- assert (1 <= Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.unsigned l)))) %R.
- { rewrite <- round_NE_abs, <- Z2R_abs by (apply fexp_correct; reflexivity).
- erewrite <- round_generic with (x := 1%R).
- 3:eapply (generic_format_bpow _ _ 0).
- apply round_le, (Z2R_le 1).
- apply fexp_correct; reflexivity. apply (valid_rnd_round_mode mode_NE).
- zify; omega.
- apply (valid_rnd_round_mode mode_NE).
- compute; discriminate. }
- eapply B2R_inj.
- + destruct binary_normalize; try discriminate H15.
- unfold B2R in H10. rewrite <- H10, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
- auto.
- + match goal with Hf0:is_finite _ _ ?f0 = true,
- Hf1:B2R _ _ ?f1 = _ |-
- is_finite_strict _ _ ?f = true =>
- change f0 with f in Hf0; change f1 with f in Hf1;
- destruct f
- end; try discriminate H13.
- unfold B2R in H12. rewrite <- H12, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
- auto.
- + etransitivity; eauto. }
- * rewrite <- Z2R_abs. apply (Z2R_lt _ (2^1024)).
- compute_this Int.modulus; compute_this (two_p 32);
- compute_this (two_p 52); compute_this (2^1024).
- clear - H0. zify; omega.
- + rewrite Z2R_minus, Int.unsigned_repr, <- two_p_is_exp, !Z2R_mult.
- ring_simplify. reflexivity.
- omega. omega. compute; split; discriminate.
- - apply valid_rnd_round_mode.
- - apply sterbenz.
- + apply FLT_exp_monotone.
- + apply generic_format_B2R.
- + apply generic_format_B2R.
- + rewrite H3, H5, Int.unsigned_repr by (compute; split; discriminate).
- unfold bpow. rewrite <- !Z2R_plus, <- (Z2R_mult 2).
- compute_this (Z.pow_pos radix2 84);
- compute_this (two_p 20 * two_p 32); compute_this (two_p 32);
- compute_this (Int.modulus).
- change (19342813113834066795298816 + 4503599627370496)
- with (9671406559168833211334656 * 2).
- unfold Rdiv. rewrite Z2R_mult, Rmult_assoc, Rinv_r, Rmult_1_r by (apply (Z2R_neq 2 0); omega).
- split; apply Z2R_le; omega.
-Qed.
-
-Theorem floatoflong_decomp:
- forall l, floatoflong l =
- add (mul (floatofint (Int64.hiword l)) (pow2_float false 32 (conj eq_refl eq_refl)))
- (floatofintu (Int64.loword l)).
-Proof.
- intros.
- unfold floatofintu, floatofint.
- destruct (binary_normalize64_exact (Int.signed (Int64.hiword l))).
- { pose proof (Int.signed_range (Int64.hiword l)).
- revert H. generalize (Int.signed (Int64.hiword l)).
- change (forall z : Z, -2147483648 <= z <= 2147483647 -> - 9007199254740992 < z < 9007199254740992).
- intros. omega. }
- destruct (binary_normalize64_exact (Int.unsigned (Int64.loword l))).
- { pose proof (Int.unsigned_range (Int64.loword l)).
- revert H1. generalize (Int.unsigned (Int64.loword l)).
- change (forall z : Z, 0 <= z < 4294967296 -> - 9007199254740992 < z < 9007199254740992).
- intros. omega. }
- unfold mul, b64_mult.
- pose proof (Bmult_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (binary_normalize64 (Int.signed (Int64.hiword l)) 0 false)
- (pow2_float false 32 (conj eq_refl eq_refl))).
- rewrite H in H3.
- remember (B2R 53 1024 (pow2_float false 32 (conj eq_refl eq_refl))).
- compute in Heqr.
- change 4503599627370496%R with (Z2R (1048576*4294967296)) in Heqr.
- change 1048576%R with (Z2R 1048576) in Heqr.
- rewrite Z2R_mult in Heqr.
- assert (r = Z2R 4294967296).
- { rewrite Heqr. field. change 0%R with (Z2R 0). intro. apply eq_Z2R in H4. discriminate. }
- clear Heqr. subst.
- pose proof (Int.signed_range (Int64.hiword l)).
- change Int.min_signed with (-2147483648) in H4.
- change Int.max_signed with 2147483647 in H4.
- rewrite <- Z2R_mult in H3.
- rewrite round_generic in H3.
- - destruct (Rlt_bool_spec (Rabs (Z2R (Int.signed (Int64.hiword l) * 4294967296)))
- (bpow radix2 1024)).
- + rewrite H0 in H3.
- destruct H3 as [? [? ?]].
- { unfold add, b64_plus.
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (Bmult 53 1024 eq_refl eq_refl binop_pl mode_NE
- (binary_normalize64 (Int.signed (Int64.hiword l)) 0 false)
- (pow2_float false 32 (conj eq_refl eq_refl)))
- (binary_normalize64 (Int.unsigned (Int64.loword l)) 0 false) H6 H2).
- rewrite H3, H1, <- Z2R_plus in H8.
- change 4294967296 with (two_p 32) in H8.
- rewrite <- Int64.ofwords_add'', Int64.ofwords_recompose in H8.
- destruct (Rlt_bool_spec
- (Rabs
- (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.signed l))))
- (bpow radix2 1024)).
- - unfold floatoflong. unfold binary_normalize64.
- pose proof (binary_normalize64_correct (Int64.signed l) 0 false).
- replace (F2R (beta:=radix2) {| Fnum := Int64.signed l; Fexp := 0 |})
- with (Z2R (Int64.signed l)) in H10
- by (unfold F2R, Fexp, Fnum, bpow; field).
- rewrite Rlt_bool_true in H10 by auto.
- destruct (Int64.eq_dec l Int64.zero). subst. reflexivity.
- destruct H10, H8.
- assert (1 <= round radix2 (FLT_exp (3 - 1024 - 53) 53) (round_mode mode_NE)
- (Z2R (Zabs (Int64.signed l))))%R.
- { erewrite <- round_generic with (x:=1%R).
- apply round_le. apply fexp_correct. reflexivity. apply valid_rnd_round_mode.
- assert (Int64.signed l <> 0).
- { contradict n. rewrite <- (Int64.repr_signed l), n. auto. }
- change 1%R with (Z2R 1). apply Z2R_le.
- zify. omega. apply valid_rnd_round_mode.
- apply (generic_format_bpow _ _ 0). compute. discriminate. }
- rewrite Z2R_abs in H13.
- rewrite round_NE_abs in H13 by (apply fexp_correct; reflexivity).
- change ZnearestE with (round_mode mode_NE) in H13.
- unfold binary_normalize64 in *.
- apply B2R_inj.
- + destruct H11, (binary_normalize 53 1024 eq_refl eq_refl mode_NE (Int64.signed l) 0 false); try discriminate.
- unfold B2R in H10. rewrite <- H10, Rabs_R0 in H13. apply (le_Z2R 1 0) in H13. omega.
- auto.
- + destruct H12; match goal with Hf0:is_finite _ _ ?f0 = true,
- Hf1:B2R _ _ ?f1 = _ |-
- is_finite_strict _ _ ?f = true =>
- change f0 with f in Hf0; change f1 with f in Hf1;
- destruct f
- end; try discriminate.
- unfold B2R in H8. rewrite <- H8, Rabs_R0 in H13. apply (le_Z2R 1 0) in H13. omega.
- auto.
- + rewrite H10. symmetry. apply H8.
- - exfalso.
- eapply Rle_trans with (r3:=round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (bpow radix2 64)) in H9.
- rewrite round_generic in H9.
- + eapply le_bpow in H9. omega.
- + apply valid_rnd_round_mode.
- + apply generic_format_bpow. compute. discriminate.
- + rewrite <- round_NE_abs. 2:apply fexp_correct; reflexivity.
- apply round_le. apply fexp_correct; reflexivity. apply valid_rnd_round_mode.
- rewrite <- Z2R_abs. change (bpow radix2 64)%R with (Z2R Int64.modulus).
- apply Z2R_le.
- destruct (Int64.signed_range l).
- assert (-Int64.modulus < Int64.min_signed) by reflexivity.
- assert (Int64.max_signed < Int64.modulus) by reflexivity.
- zify. omega. }
- + exfalso.
- rewrite <- Z2R_abs in H5.
- change (bpow radix2 1024) with (Z2R (radix2 ^ 1024)) in H5.
- apply le_Z2R in H5. assert (radix2 ^ 1024 < 18446744073709551616) by (zify; omega).
- discriminate.
- - apply valid_rnd_round_mode.
- - destruct (Z.eq_dec (Int.signed (Int64.hiword l)) 0).
- rewrite e. apply generic_format_0.
- apply generic_format_FLT_FLX.
- + apply Rle_trans with (bpow radix2 0). apply bpow_le. omega.
- change (bpow radix2 0) with (Z2R 1). rewrite <- Z2R_abs. apply Z2R_le.
- clear - n H4. zify; omega.
- + apply generic_format_FLX.
- eexists {| Fnum := Int.signed (Int64.hiword l); Fexp := 32 |}.
- unfold F2R, Fnum, Fexp. split.
- rewrite Z2R_mult. auto.
- change (radix2 ^ 53) with 9007199254740992.
- clear -n H4. zify; omega.
-Qed.
-
-Theorem floatoflong_from_words:
+ pose proof (Int.unsigned_range (Int64.loword l)).
+ rewrite ! from_words_eq, ! from_words_eq'.
+ set (p20 := Int.unsigned (Int.repr (two_p 20))).
+ set (x := Int64.unsigned l) in *;
+ set (xl := Int.unsigned (Int64.loword l)) in *;
+ set (xh := Int.unsigned (Int64.hiword l)) in *.
+ unfold sub, b64_minus. rewrite BofZ_minus.
+ replace (2^84 + xh * 2^32 - (2^84 + p20 * 2^32))
+ with ((xh - p20) * 2^32) by ring.
+ unfold add, b64_plus. rewrite BofZ_plus.
+ unfold of_longu, b64_of_Z. f_equal.
+ rewrite <- (Int64.ofwords_recompose l) at 1. rewrite Int64.ofwords_add'.
+ fold xh; fold xl. compute_this (two_p 32); compute_this p20; ring.
+ apply integer_representable_n2p; auto.
+ compute_this p20; smart_omega. omega. omega.
+ apply integer_representable_n; auto; smart_omega.
+ replace (2^84 + xh * 2^32) with ((2^52 + xh) * 2^32) by ring.
+ apply integer_representable_n2p; auto. smart_omega. omega. omega.
+ change (2^84 + p20 * 2^32) with ((2^52 + 1048576) * 2^32).
+ apply integer_representable_n2p; auto. omega. omega.
+Qed.
+
+Theorem of_long_from_words:
forall l,
- floatoflong l =
+ of_long l =
add (sub (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
(from_words ox4530_0000 (Int.repr (two_p 20+two_p 31))))
(from_words ox4330_0000 (Int64.loword l)).
Proof.
intros.
- pose proof (Int64.signed_range l);
- compute_this (Int64.min_signed); compute_this (Int64.max_signed).
- pose proof (Int.unsigned_range (Int.add (Int64.hiword l) ox8000_0000)).
- destruct (from_words_value (Int64.loword l)).
- destruct (from_words_value' (Int.add (Int64.hiword l) ox8000_0000)).
- destruct (from_words_value' (Int.repr (two_p 20+two_p 31))).
- unfold sub, b64_minus.
- pose proof (Bminus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
- (from_words ox4530_0000 (Int.repr (two_p 20+two_p 31))) H4 H6).
- rewrite round_generic in H7.
- - rewrite H3, H5, ox8000_0000_signed_unsigned in H7.
- replace (bpow radix2 84 + Z2R ((Int.signed (Int64.hiword l) + Int.half_modulus) * two_p 32) -
- (bpow radix2 84 + Z2R (Int.unsigned (Int.repr (two_p 20+two_p 31)) * two_p 32)))%R
- with (Z2R (Int.unsigned (Int.add (Int64.hiword l) ox8000_0000) * two_p 32 -two_p 52-two_p 63)) in H7.
- + rewrite Rlt_bool_true in H7.
- * { destruct H7 as [? []].
- unfold floatoflong, binary_normalize64.
- pose proof (binary_normalize64_correct (Int64.signed l) 0 false).
- replace (F2R (beta:=radix2) {| Fnum := Int64.signed l; Fexp := 0 |})
- with (Z2R (Int64.signed l)) in H10
- by (unfold F2R, Fexp, Fnum, bpow; field).
- assert (Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.signed l))) <
- bpow radix2 1024)%R.
- { rewrite <- round_NE_abs by (apply fexp_correct; reflexivity).
- eapply Rle_lt_trans with (Z2R (two_p 64)). 2:apply Z2R_lt; reflexivity.
- erewrite <- round_generic.
- - apply round_le. apply fexp_correct; reflexivity.
- apply (valid_rnd_round_mode mode_NE).
- rewrite <- Z2R_abs. apply Z2R_le.
- compute_this (two_p 64). zify; omega.
- - apply (valid_rnd_round_mode mode_NE).
- - apply (generic_format_bpow radix2 _ 64). compute. discriminate. }
- rewrite Rlt_bool_true in H10 by auto.
- unfold add, b64_plus.
- pose proof (Bplus_correct 53 1024 eq_refl eq_refl binop_pl mode_NE
- (Bminus 53 1024 eq_refl eq_refl binop_pl mode_NE
- (from_words ox4530_0000 (Int.add (Int64.hiword l) ox8000_0000))
- (from_words ox4530_0000 (Int.repr (two_p 20 + two_p 31))))
- (from_words ox4330_0000 (Int64.loword l)) H8 H2).
- change (bpow radix2 52) with (Z2R (two_p 52)) in H1.
- rewrite H7, H1, <- !Z2R_plus, ox8000_0000_signed_unsigned in H12.
- change (two_p 63) with (Int.half_modulus * two_p 32) in H12.
- replace ((Int.signed (Int64.hiword l) + Int.half_modulus) *
- two_p 32 - two_p 52 - (Int.half_modulus * two_p 32) +
- (two_p 52 + Int.unsigned (Int64.loword l)))
- with (Int.signed (Int64.hiword l) * two_p 32 + Int.unsigned (Int64.loword l))
- in H12 by ring.
- rewrite <- Int64.ofwords_add'', Int64.ofwords_recompose, Rlt_bool_true in H12 by auto.
- destruct (Z.eq_dec (Int64.signed l) 0).
- - apply (f_equal Int64.repr) in e. rewrite Int64.repr_signed in e.
- subst. reflexivity.
- - destruct H12 as [? []], H10 as [? []].
- assert (1 <= Rabs (round radix2 (FLT_exp (3 - 1024 - 53) 53)
- (round_mode mode_NE) (Z2R (Int64.signed l)))) %R.
- { rewrite <- round_NE_abs, <- Z2R_abs by (apply fexp_correct; reflexivity).
- erewrite <- round_generic with (x := 1%R).
- 3:eapply (generic_format_bpow _ _ 0).
- apply round_le, (Z2R_le 1).
- apply fexp_correct; reflexivity. apply (valid_rnd_round_mode mode_NE).
- zify; omega.
- apply (valid_rnd_round_mode mode_NE).
- compute; discriminate. }
- eapply B2R_inj.
- + destruct binary_normalize; try discriminate H15.
- unfold B2R in H10. rewrite <- H10, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
- auto.
- + match goal with Hf0:is_finite _ _ ?f0 = true,
- Hf1:B2R _ _ ?f1 = _ |-
- is_finite_strict _ _ ?f = true =>
- change f0 with f in Hf0; change f1 with f in Hf1;
- destruct f
- end; try discriminate H13.
- unfold B2R in H12. rewrite <- H12, Rabs_R0 in H17. apply (le_Z2R 1 0) in H17. omega.
- auto.
- + etransitivity; eauto. }
- * rewrite <- Z2R_abs. apply (Z2R_lt _ (2^1024)).
- compute_this Int.modulus; compute_this (two_p 32);
- compute_this (two_p 52); compute_this (two_p 63); compute_this (2^1024).
- clear - H0. zify; omega.
- + rewrite ox8000_0000_signed_unsigned, !Z2R_minus.
- compute_this (Z2R (Int.unsigned (Int.repr (two_p 20 + two_p 31)) * two_p 32)).
- compute_this (Z2R (two_p 52)). compute_this (Z2R (two_p 63)). ring.
- - apply valid_rnd_round_mode.
- - apply sterbenz.
- + apply FLT_exp_monotone.
- + apply generic_format_B2R.
- + apply generic_format_B2R.
- + rewrite H3, H5, Int.unsigned_repr by (compute; split; discriminate).
- unfold bpow. rewrite <- !Z2R_plus, <- (Z2R_mult 2).
- compute_this (Z.pow_pos radix2 84); compute_this (Z.pow_pos radix2 84);
- compute_this ((two_p 20 + two_p 31) * two_p 32); compute_this (two_p 32);
- compute_this (Int.modulus).
- change (19342813113834066795298816 + 9227875636482146304)
- with (9671411170854851638722560 * 2).
- unfold Rdiv. rewrite Z2R_mult, Rmult_assoc, Rinv_r, Rmult_1_r by (apply (Z2R_neq 2 0); omega).
- split; apply Z2R_le; omega.
+ pose proof (Int64.signed_range l).
+ pose proof (Int.signed_range (Int64.hiword l)).
+ pose proof (Int.unsigned_range (Int64.loword l)).
+ rewrite ! from_words_eq, ! from_words_eq'.
+ set (p := Int.unsigned (Int.repr (two_p 20 + two_p 31))).
+ set (x := Int64.signed l) in *;
+ set (xl := Int.unsigned (Int64.loword l)) in *;
+ set (xh := Int.signed (Int64.hiword l)) in *.
+ rewrite ox8000_0000_signed_unsigned. fold xh.
+ unfold sub, b64_minus. rewrite BofZ_minus.
+ replace (2^84 + (xh + Int.half_modulus) * 2^32 - (2^84 + p * 2^32))
+ with ((xh - 2^20) * 2^32)
+ by (compute_this p; compute_this Int.half_modulus; ring).
+ unfold add, b64_plus. rewrite BofZ_plus.
+ unfold of_long, b64_of_Z. f_equal.
+ rewrite <- (Int64.ofwords_recompose l) at 1. rewrite Int64.ofwords_add''.
+ fold xh; fold xl. compute_this (two_p 32); ring.
+ apply integer_representable_n2p; auto.
+ compute_this (2^20); smart_omega. omega. omega.
+ apply integer_representable_n; auto; smart_omega.
+ replace (2^84 + (xh + Int.half_modulus) * 2^32)
+ with ((2^52 + xh + Int.half_modulus) * 2^32)
+ by (compute_this Int.half_modulus; ring).
+ apply integer_representable_n2p; auto. smart_omega. omega. omega.
+ change (2^84 + p * 2^32) with ((2^52 + p) * 2^32).
+ apply integer_representable_n2p; auto.
+ compute_this p; smart_omega. omega.
Qed.
-Global Opaque
- zero eq_dec neg abs singleoffloat intoffloat intuoffloat floatofint floatofintu
- add sub mul div cmp bits_of_double double_of_bits bits_of_single single_of_bits from_words.
+(** Conversions from 64-bit integers can be expressed in terms of
+ conversions from their 32-bit halves. *)
+
+Theorem of_longu_decomp:
+ forall l,
+ of_longu l = add (mul (of_intu (Int64.hiword l)) (b64_of_Z (2^32)))
+ (of_intu (Int64.loword l)).
+Proof.
+ intros.
+ unfold of_longu, of_intu, b64_of_Z, add, mul, b64_plus, b64_mult.
+ pose proof (Int.unsigned_range (Int64.loword l)).
+ pose proof (Int.unsigned_range (Int64.hiword l)).
+ pose proof (Int64.unsigned_range l).
+ set (x := Int64.unsigned l) in *.
+ set (yl := Int.unsigned (Int64.loword l)) in *.
+ set (yh := Int.unsigned (Int64.hiword l)) in *.
+ assert (DECOMP: x = yh * 2^32 + yl).
+ { unfold x. rewrite <- (Int64.ofwords_recompose l). apply Int64.ofwords_add'. }
+ rewrite BofZ_mult. rewrite BofZ_plus. rewrite DECOMP; auto.
+ apply integer_representable_n2p; auto. smart_omega. omega. omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto; smart_omega.
+ compute; auto.
+Qed.
+
+Theorem of_long_decomp:
+ forall l,
+ of_long l = add (mul (of_int (Int64.hiword l)) (b64_of_Z (2^32)))
+ (of_intu (Int64.loword l)).
+Proof.
+ intros.
+ unfold of_long, of_int, of_intu, b64_of_Z, add, mul, b64_plus, b64_mult.
+ pose proof (Int.unsigned_range (Int64.loword l)).
+ pose proof (Int.signed_range (Int64.hiword l)).
+ pose proof (Int64.signed_range l).
+ set (x := Int64.signed l) in *.
+ set (yl := Int.unsigned (Int64.loword l)) in *.
+ set (yh := Int.signed (Int64.hiword l)) in *.
+ assert (DECOMP: x = yh * 2^32 + yl).
+ { unfold x. rewrite <- (Int64.ofwords_recompose l), Int64.ofwords_add''. auto. }
+ rewrite BofZ_mult. rewrite BofZ_plus. rewrite DECOMP; auto.
+ apply integer_representable_n2p; auto. smart_omega. omega. omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto; smart_omega.
+ apply integer_representable_n; auto. compute; intuition congruence.
+ compute; auto.
+Qed.
+
+(** Conversions from unsigned longs can be expressed in terms of conversions from signed longs.
+ If the unsigned long is too big, a round-to-odd must be performed on it
+ to avoid double rounding. *)
+
+Theorem of_longu_of_long_1:
+ forall x,
+ Int64.ltu x (Int64.repr Int64.half_modulus) = true ->
+ of_longu x = of_long x.
+Proof.
+ unfold of_longu, of_long, Int64.signed, Int64.ltu; intro.
+ change (Int64.unsigned (Int64.repr Int64.half_modulus)) with Int64.half_modulus.
+ destruct (zlt (Int64.unsigned x) Int64.half_modulus); now intuition.
+Qed.
+
+Theorem of_longu_of_long_2:
+ forall x,
+ Int64.ltu x (Int64.repr Int64.half_modulus) = false ->
+ of_longu x = mul (of_long (Int64.or (Int64.shru x Int64.one)
+ (Int64.and x Int64.one)))
+ (of_int (Int.repr 2)).
+Proof.
+ intros. change (of_int (Int.repr 2)) with (BofZ 53 1024 __ __ (2^1)).
+ pose proof (Int64.unsigned_range x).
+ unfold Int64.ltu in H.
+ change (Int64.unsigned (Int64.repr Int64.half_modulus)) with (2^63) in H.
+ destruct (zlt (Int64.unsigned x) (2^63)); inv H.
+ assert (Int64.modulus <= 2^1024 - 2^(1024-53)) by (vm_compute; intuition congruence).
+ set (n := Int64.or (Int64.shru x Int64.one) (Int64.and x Int64.one)).
+ assert (NB: forall i, 0 <= i < 64 ->
+ Int64.testbit n i =
+ if zeq i 0 then Int64.testbit x 1 || Int64.testbit x 0
+ else if zeq i 63 then false else Int64.testbit x (i + 1)).
+ { intros; unfold n; autorewrite with ints; auto. rewrite Int64.unsigned_one.
+ rewrite Int64.bits_one. compute_this Int64.zwordsize.
+ destruct (zeq i 0); simpl proj_sumbool.
+ rewrite zlt_true by omega. rewrite andb_true_r. subst i; auto.
+ rewrite andb_false_r, orb_false_r.
+ destruct (zeq i 63). subst i. apply zlt_false; omega.
+ apply zlt_true; omega. }
+ assert (NB2: forall i, 0 <= i ->
+ Z.testbit (Int64.signed n * 2^1) i =
+ if zeq i 0 then false else
+ if zeq i 1 then Int64.testbit x 1 || Int64.testbit x 0 else
+ Int64.testbit x i).
+ { intros. rewrite Z.mul_pow2_bits by omega. destruct (zeq i 0).
+ apply Z.testbit_neg_r; omega.
+ rewrite Int64.bits_signed by omega. compute_this Int64.zwordsize.
+ destruct (zlt (i-1) 64).
+ rewrite NB by omega. destruct (zeq i 1).
+ subst. rewrite dec_eq_true by auto. auto.
+ rewrite dec_eq_false by omega. destruct (zeq (i - 1) 63).
+ symmetry. apply Int64.bits_above. compute_this Int64.zwordsize; omega.
+ f_equal; omega.
+ rewrite NB by omega. rewrite dec_eq_false by omega. rewrite dec_eq_true by auto.
+ rewrite dec_eq_false by omega. symmetry. apply Int64.bits_above. compute_this Int64.zwordsize; omega.
+ }
+ assert (EQ: Int64.signed n * 2 = int_round_odd (Int64.unsigned x) 1).
+ {
+ symmetry. apply (int_round_odd_bits 53 1024). omega.
+ intros. rewrite NB2 by omega. replace i with 0 by omega. auto.
+ rewrite NB2 by omega. rewrite dec_eq_false by omega. rewrite dec_eq_true.
+ rewrite orb_comm. unfold Int64.testbit. change (2^1) with 2.
+ destruct (Z.testbit (Int64.unsigned x) 0) eqn:B0;
+ [rewrite Z.testbit_true in B0 by omega|rewrite Z.testbit_false in B0 by omega];
+ change (2^0) with 1 in B0; rewrite Zdiv_1_r in B0; rewrite B0; auto.
+ intros. rewrite NB2 by omega. rewrite ! dec_eq_false by omega. auto.
+ }
+ unfold mul, of_long, of_longu, b64_mult, b64_of_Z.
+ rewrite BofZ_mult_2p.
+- change (2^1) with 2. rewrite EQ. apply BofZ_round_odd with (p := 1).
++ omega.
++ apply Zle_trans with Int64.modulus; trivial. smart_omega.
++ omega.
++ apply Zle_trans with (2^63). compute; intuition congruence. xomega.
+- apply Zle_trans with Int64.modulus; trivial.
+ pose proof (Int64.signed_range n).
+ compute_this Int64.min_signed; compute_this Int64.max_signed;
+ compute_this Int64.modulus; xomega.
+- assert (2^63 <= int_round_odd (Int64.unsigned x) 1).
+ { change (2^63) with (int_round_odd (2^63) 1). apply (int_round_odd_le 0 0); omega. }
+ rewrite <- EQ in H1. compute_this (2^63). compute_this (2^53). xomega.
+- omega.
+Qed.
End Float.
+
+(** * Single-precision FP numbers *)
+
+Module Float32.
+
+(** ** NaN payload manipulations *)
+
+Program Definition transform_quiet_pl (pl:nan_pl 24) : nan_pl 24 :=
+ Pos.lor pl (nat_iter 22 xO xH).
+Next Obligation.
+ destruct pl.
+ simpl. rewrite Z.ltb_lt in *.
+ assert (forall x, S (Fcore_digits.digits2_Pnat x) = Pos.to_nat (Pos.size x)).
+ { induction x0; simpl; auto; rewrite IHx0; zify; omega. }
+ fold (Z.of_nat (S (Fcore_digits.digits2_Pnat (Pos.lor x 4194304)))).
+ rewrite H, positive_nat_Z, Psize_log_inf, <- Zlog2_log_inf in *. clear H.
+ change (Z.pos (Pos.lor x 4194304)) with (Z.lor (Z.pos x) 4194304).
+ rewrite Z.log2_lor by (zify; omega).
+ apply Z.max_case. auto. simpl. omega.
+Qed.
+
+Lemma transform_quiet_pl_idempotent:
+ forall pl, transform_quiet_pl (transform_quiet_pl pl) = transform_quiet_pl pl.
+Proof.
+ intros []; simpl; intros. apply Float.nan_payload_fequal.
+ simpl. apply Float.lor_idempotent.
+Qed.
+
+Definition neg_pl (s:bool) (pl:nan_pl 24) := (negb s, pl).
+Definition abs_pl (s:bool) (pl:nan_pl 24) := (false, pl).
+
+Definition binop_pl (x y: binary32) : bool*nan_pl 24 :=
+ match x, y with
+ | B754_nan s1 pl1, B754_nan s2 pl2 =>
+ if Archi.choose_binop_pl_32 s1 pl1 s2 pl2
+ then (s2, transform_quiet_pl pl2)
+ else (s1, transform_quiet_pl pl1)
+ | B754_nan s1 pl1, _ => (s1, transform_quiet_pl pl1)
+ | _, B754_nan s2 pl2 => (s2, transform_quiet_pl pl2)
+ | _, _ => Archi.default_pl_32
+ end.
+
+(** ** Operations over single-precision floats *)
+
+Definition zero: float32 := B754_zero _ _ false. (**r the float [+0.0] *)
+
+Definition eq_dec: forall (f1 f2: float32), {f1 = f2} + {f1 <> f2} := b32_eq_dec.
+
+(** Arithmetic operations *)
+
+Definition neg: float32 -> float32 := b32_opp neg_pl. (**r opposite (change sign) *)
+Definition abs: float32 -> float32 := b32_abs abs_pl. (**r absolute value (set sign to [+]) *)
+Definition add: float32 -> float32 -> float32 := b32_plus binop_pl mode_NE. (**r addition *)
+Definition sub: float32 -> float32 -> float32 := b32_minus binop_pl mode_NE. (**r subtraction *)
+Definition mul: float32 -> float32 -> float32 := b32_mult binop_pl mode_NE. (**r multiplication *)
+Definition div: float32 -> float32 -> float32 := b32_div binop_pl mode_NE. (**r division *)
+Definition cmp (c:comparison) (f1 f2: float32) : bool := (**r comparison *)
+ cmp_of_comparison c (b32_compare f1 f2).
+
+(** Conversions *)
+
+Definition of_double : float -> float32 := Float.to_single.
+Definition to_double : float32 -> float := Float.of_single.
+
+Definition to_int (f:float32): option int := (**r conversion to signed 32-bit int *)
+ option_map Int.repr (b32_to_Z_range f Int.min_signed Int.max_signed).
+Definition to_intu (f:float32): option int := (**r conversion to unsigned 32-bit int *)
+ option_map Int.repr (b32_to_Z_range f 0 Int.max_unsigned).
+Definition to_long (f:float32): option int64 := (**r conversion to signed 64-bit int *)
+ option_map Int64.repr (b32_to_Z_range f Int64.min_signed Int64.max_signed).
+Definition to_longu (f:float32): option int64 := (**r conversion to unsigned 64-bit int *)
+ option_map Int64.repr (b32_to_Z_range f 0 Int64.max_unsigned).
+
+Definition of_int (n:int): float32 := (**r conversion from signed 32-bit int to single-precision float *)
+ b32_of_Z (Int.signed n).
+Definition of_intu (n:int): float32 := (**r conversion from unsigned 32-bit int to single-precision float *)
+ b32_of_Z (Int.unsigned n).
+
+Definition of_long (n:int64): float32 := (**r conversion from signed 64-bit int to single-precision float *)
+ b32_of_Z (Int64.signed n).
+Definition of_longu (n:int64): float32 := (**r conversion from unsigned 64-bit int to single-precision float *)
+ b32_of_Z (Int64.unsigned n).
+
+Definition from_parsed (base:positive) (intPart:positive) (expPart:Z) : float32 :=
+ build_from_parsed 24 128 __ __ base intPart expPart.
+
+(** Conversions between floats and their concrete in-memory representation
+ as a sequence of 32 bits. *)
+
+Definition to_bits (f: float32) : int := Int.repr (bits_of_b32 f).
+Definition of_bits (b: int): float32 := b32_of_bits (Int.unsigned b).
+
+(** ** Properties *)
+
+(** Commutativity properties of addition and multiplication. *)
+
+Theorem add_commut:
+ forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> add x y = add y x.
+Proof.
+ intros. apply Bplus_commut.
+ destruct x, y; try reflexivity. simpl in H. intuition congruence.
+Qed.
+
+Theorem mul_commut:
+ forall x y, is_nan _ _ x = false \/ is_nan _ _ y = false -> mul x y = mul y x.
+Proof.
+ intros. apply Bmult_commut.
+ destruct x, y; try reflexivity. simpl in H. intuition congruence.
+Qed.
+
+(** Multiplication by 2 is diagonal addition. *)
+
+Theorem mul2_add:
+ forall f, add f f = mul f (of_int (Int.repr 2%Z)).
+Proof.
+ intros. apply Bmult2_Bplus.
+ intros. destruct x; try discriminate. simpl.
+ transitivity (b, transform_quiet_pl n).
+ destruct Archi.choose_binop_pl_32; auto.
+ destruct y; auto || discriminate.
+Qed.
+
+(** Divisions that can be turned into multiplication by an inverse. *)
+
+Definition exact_inverse : float32 -> option float32 := b32_exact_inverse.
+
+Theorem div_mul_inverse:
+ forall x y z, exact_inverse y = Some z -> div x y = mul x z.
+Proof.
+ intros. apply Bdiv_mult_inverse; auto.
+ intros. destruct x0; try discriminate. simpl.
+ transitivity (b, transform_quiet_pl n).
+ destruct y0; reflexivity || discriminate.
+ destruct z0; reflexivity || discriminate.
+Qed.
+
+(** Properties of comparisons. *)
+
+Theorem cmp_swap:
+ forall c x y, cmp (swap_comparison c) x y = cmp c y x.
+Proof.
+ unfold cmp, b32_compare; intros. rewrite (Bcompare_swap _ _ x y).
+ apply cmp_of_comparison_swap.
+Qed.
+
+Theorem cmp_ne_eq:
+ forall f1 f2, cmp Cne f1 f2 = negb (cmp Ceq f1 f2).
+Proof.
+ intros; apply cmp_of_comparison_ne_eq.
+Qed.
+
+Theorem cmp_lt_eq_false:
+ forall f1 f2, cmp Clt f1 f2 = true -> cmp Ceq f1 f2 = true -> False.
+Proof.
+ intros f1 f2; apply cmp_of_comparison_lt_eq_false.
+Qed.
+
+Theorem cmp_le_lt_eq:
+ forall f1 f2, cmp Cle f1 f2 = cmp Clt f1 f2 || cmp Ceq f1 f2.
+Proof.
+ intros f1 f2; apply cmp_of_comparison_le_lt_eq.
+Qed.
+
+Theorem cmp_gt_eq_false:
+ forall x y, cmp Cgt x y = true -> cmp Ceq x y = true -> False.
+Proof.
+ intros f1 f2; apply cmp_of_comparison_gt_eq_false.
+Qed.
+
+Theorem cmp_ge_gt_eq:
+ forall f1 f2, cmp Cge f1 f2 = cmp Cgt f1 f2 || cmp Ceq f1 f2.
+Proof.
+ intros f1 f2; apply cmp_of_comparison_ge_gt_eq.
+Qed.
+
+Theorem cmp_lt_gt_false:
+ forall f1 f2, cmp Clt f1 f2 = true -> cmp Cgt f1 f2 = true -> False.
+Proof.
+ intros f1 f2; apply cmp_of_comparison_lt_gt_false.
+Qed.
+
+Theorem cmp_double:
+ forall f1 f2 c, cmp c f1 f2 = Float.cmp c (to_double f1) (to_double f2).
+Proof.
+ unfold cmp, Float.cmp; intros. f_equal. symmetry. apply Bcompare_Bconv_widen.
+ red; omega. omega. omega.
+Qed.
+
+(** Properties of conversions to/from in-memory representation.
+ The conversions are bijective (one-to-one). *)
+
+Theorem of_to_bits:
+ forall f, of_bits (to_bits f) = f.
+Proof.
+ intros; unfold of_bits, to_bits, bits_of_b32, b32_of_bits.
+ rewrite Int.unsigned_repr, binary_float_of_bits_of_binary_float; [reflexivity|].
+ generalize (bits_of_binary_float_range 23 8 __ __ f).
+ change (2^(23+8+1)) with (Int.max_unsigned + 1). omega.
+Qed.
+
+Theorem to_of_bits:
+ forall b, to_bits (of_bits b) = b.
+Proof.
+ intros; unfold of_bits, to_bits, bits_of_b32, b32_of_bits.
+ rewrite bits_of_binary_float_of_bits. apply Int.repr_unsigned.
+ apply Int.unsigned_range.
+Qed.
+
+(** Conversions from 32-bit integers to single-precision floats can
+ be decomposed into a conversion to a double-precision float,
+ followed by a [Float32.of_double] conversion. No double rounding occurs. *)
+
+Theorem of_int_double:
+ forall n, of_int n = of_double (Float.of_int n).
+Proof.
+ intros. symmetry. apply Bconv_BofZ.
+ apply integer_representable_n; auto. generalize (Int.signed_range n); Float.smart_omega.
+Qed.
+
+Theorem of_intu_double:
+ forall n, of_intu n = of_double (Float.of_intu n).
+Proof.
+ intros. symmetry. apply Bconv_BofZ.
+ apply integer_representable_n; auto. generalize (Int.unsigned_range n); Float.smart_omega.
+Qed.
+
+(** Conversion of single-precision floats to integers can be decomposed
+ into a [Float32.to_double] extension, followed by a double-precision-to-int
+ conversion. *)
+
+Theorem to_int_double:
+ forall f n, to_int f = Some n -> Float.to_int (to_double f) = Some n.
+Proof.
+ intros.
+ unfold to_int in H.
+ destruct (b32_to_Z_range f Int.min_signed Int.max_signed) as [n'|] eqn:E; inv H.
+ unfold Float.to_int, to_double, Float.of_single, b64_to_Z_range, b64_of_b32.
+ erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega.
+Qed.
+
+Theorem to_intu_double:
+ forall f n, to_intu f = Some n -> Float.to_intu (to_double f) = Some n.
+Proof.
+ intros.
+ unfold to_intu in H.
+ destruct (b32_to_Z_range f 0 Int.max_unsigned) as [n'|] eqn:E; inv H.
+ unfold Float.to_intu, to_double, Float.of_single, b64_to_Z_range, b64_of_b32.
+ erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega.
+Qed.
+
+Theorem to_long_double:
+ forall f n, to_long f = Some n -> Float.to_long (to_double f) = Some n.
+Proof.
+ intros.
+ unfold to_long in H.
+ destruct (b32_to_Z_range f Int64.min_signed Int64.max_signed) as [n'|] eqn:E; inv H.
+ unfold Float.to_long, to_double, Float.of_single, b64_to_Z_range, b64_of_b32.
+ erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega.
+Qed.
+
+Theorem to_longu_double:
+ forall f n, to_longu f = Some n -> Float.to_longu (to_double f) = Some n.
+Proof.
+ intros.
+ unfold to_longu in H.
+ destruct (b32_to_Z_range f 0 Int64.max_unsigned) as [n'|] eqn:E; inv H.
+ unfold Float.to_longu, to_double, Float.of_single, b64_to_Z_range, b64_of_b32.
+ erewrite ZofB_range_Bconv; eauto. auto. omega. omega. omega. omega.
+Qed.
+
+(** Conversions from 64-bit integers to single-precision floats can be expressed
+ as conversion to a double-precision float followed by a [Float32.of_double] conversion.
+ To avoid double rounding when the integer is large (above [2^53]), a round
+ to odd must be performed on the integer before conversion to double-precision float. *)
+
+Lemma int_round_odd_plus:
+ forall p n, 0 <= p ->
+ int_round_odd n p = Z.land (Z.lor n (Z.land n (2^p-1) + (2^p-1))) (-(2^p)).
+Proof.
+ intros.
+ assert (POS: 0 < 2^p) by (apply (Zpower_gt_0 radix2); auto).
+ assert (A: Z.land n (2^p-1) = n mod 2^p).
+ { rewrite <- Z.land_ones by auto. f_equal. rewrite Z.ones_equiv. omega. }
+ rewrite A.
+ assert (B: 0 <= n mod 2^p < 2^p).
+ { apply Z_mod_lt. omega. }
+ set (m := n mod 2^p + (2^p-1)) in *.
+ assert (C: m / 2^p = if zeq (n mod 2^p) 0 then 0 else 1).
+ { unfold m. destruct (zeq (n mod 2^p) 0).
+ rewrite e. apply Zdiv_small. omega.
+ eapply Zdiv_unique with (n mod 2^p - 1). ring. omega. }
+ assert (D: Z.testbit m p = if zeq (n mod 2^p) 0 then false else true).
+ { destruct (zeq (n mod 2^p) 0).
+ apply Z.testbit_false; auto. rewrite C; auto.
+ apply Z.testbit_true; auto. rewrite C; auto. }
+ assert (E: forall i, p < i -> Z.testbit m i = false).
+ { intros. apply Z.testbit_false. omega.
+ replace (m / 2^i) with 0. auto. symmetry. apply Zdiv_small.
+ unfold m. split. omega. apply Zlt_le_trans with (2 * 2^p). omega.
+ change 2 with (2^1) at 1. rewrite <- (Zpower_plus radix2) by omega.
+ apply Zpower_le. omega. }
+ assert (F: forall i, 0 <= i -> Z.testbit (-2^p) i = if zlt i p then false else true).
+ { intros. rewrite Z.bits_opp by auto. rewrite <- Z.ones_equiv.
+ destruct (zlt i p).
+ rewrite Z.ones_spec_low by omega. auto.
+ rewrite Z.ones_spec_high by omega. auto. }
+ apply int_round_odd_bits; auto.
+ - intros. rewrite Z.land_spec, F, zlt_true by omega. apply andb_false_r.
+ - rewrite Z.land_spec, Z.lor_spec, D, F, zlt_false, andb_true_r by omega.
+ destruct (Z.eqb (n mod 2^p) 0) eqn:Z.
+ rewrite Z.eqb_eq in Z. rewrite Z, zeq_true. apply orb_false_r.
+ rewrite Z.eqb_neq in Z. rewrite zeq_false by auto. apply orb_true_r.
+ - intros. rewrite Z.land_spec, Z.lor_spec, E, F, zlt_false, andb_true_r by omega.
+ apply orb_false_r.
+Qed.
+
+Lemma of_long_round_odd:
+ forall n conv_nan,
+ 2^36 <= Z.abs n < 2^64 ->
+ b32_of_Z n = b32_of_b64 conv_nan mode_NE (b64_of_Z (Z.land (Z.lor n ((Z.land n 2047) + 2047)) (-2048))).
+Proof.
+ intros. rewrite <- (int_round_odd_plus 11) by omega.
+ assert (-2^64 <= int_round_odd n 11).
+ { change (-2^64) with (int_round_odd (-2^64) 11). apply (int_round_odd_le 0 0); xomega. }
+ assert (int_round_odd n 11 <= 2^64).
+ { change (2^64) with (int_round_odd (2^64) 11). apply (int_round_odd_le 0 0); xomega. }
+ unfold b32_of_Z, b32_of_b64, b64_of_Z.
+ rewrite Bconv_BofZ.
+ apply BofZ_round_odd with (p := 11).
+ omega.
+ apply Zle_trans with (2^64). omega. compute; intuition congruence.
+ omega.
+ exact (proj1 H).
+ unfold int_round_odd. apply integer_representable_n2p_wide. auto. omega.
+ unfold int_round_odd in H0, H1.
+ split; (apply Zmult_le_reg_r with (2^11); [compute; auto | assumption]).
+ omega.
+ omega.
+Qed.
+
+Theorem of_longu_double_1:
+ forall n,
+ Int64.unsigned n <= 2^53 ->
+ of_longu n = of_double (Float.of_longu n).
+Proof.
+ intros. symmetry; apply Bconv_BofZ. apply integer_representable_n; auto.
+ pose proof (Int64.unsigned_range n); omega.
+Qed.
+
+Theorem of_longu_double_2:
+ forall n,
+ 2^36 <= Int64.unsigned n ->
+ of_longu n = of_double (Float.of_longu
+ (Int64.and (Int64.or n
+ (Int64.add (Int64.and n (Int64.repr 2047))
+ (Int64.repr 2047)))
+ (Int64.repr (-2048)))).
+Proof.
+ intros.
+ pose proof (Int64.unsigned_range n).
+ unfold of_longu. erewrite of_long_round_odd.
+ unfold of_double, Float.to_single. instantiate (1 := Float.to_single_pl).
+ f_equal. unfold Float.of_longu. f_equal.
+ set (n' := Z.land (Z.lor (Int64.unsigned n) (Z.land (Int64.unsigned n) 2047 + 2047)) (-2048)).
+ assert (int_round_odd (Int64.unsigned n) 11 = n') by (apply int_round_odd_plus; omega).
+ assert (0 <= n').
+ { rewrite <- H1. change 0 with (int_round_odd 0 11). apply (int_round_odd_le 0 0); omega. }
+ assert (n' < Int64.modulus).
+ { apply Zle_lt_trans with (int_round_odd (Int64.modulus - 1) 11).
+ rewrite <- H1. apply (int_round_odd_le 0 0); omega.
+ compute; auto. }
+ rewrite <- (Int64.unsigned_repr n') by (unfold Int64.max_unsigned; omega).
+ f_equal. Int64.bit_solve. rewrite Int64.testbit_repr by auto. unfold n'.
+ rewrite Z.land_spec, Z.lor_spec. f_equal. f_equal.
+ unfold Int64.testbit. rewrite Int64.add_unsigned.
+ fold (Int64.testbit (Int64.repr
+ (Int64.unsigned (Int64.and n (Int64.repr 2047)) +
+ Int64.unsigned (Int64.repr 2047))) i).
+ rewrite Int64.testbit_repr by auto. f_equal. f_equal. unfold Int64.and.
+ symmetry. apply Int64.unsigned_repr. change 2047 with (Z.ones 11).
+ rewrite Z.land_ones by omega.
+ exploit (Z_mod_lt (Int64.unsigned n) (2^11)). compute; auto.
+ assert (2^11 < Int64.max_unsigned) by (compute; auto). omega.
+ apply Int64.same_bits_eqm; auto. exists (-1); auto.
+ split. xomega. change (2^64) with Int64.modulus. xomega.
+Qed.
+
+Theorem of_long_double_1:
+ forall n,
+ Z.abs (Int64.signed n) <= 2^53 ->
+ of_long n = of_double (Float.of_long n).
+Proof.
+ intros. symmetry; apply Bconv_BofZ. apply integer_representable_n; auto. xomega.
+Qed.
+
+Theorem of_long_double_2:
+ forall n,
+ 2^36 <= Z.abs (Int64.signed n) ->
+ of_long n = of_double (Float.of_long
+ (Int64.and (Int64.or n
+ (Int64.add (Int64.and n (Int64.repr 2047))
+ (Int64.repr 2047)))
+ (Int64.repr (-2048)))).
+Proof.
+ intros.
+ pose proof (Int64.signed_range n).
+ unfold of_long. erewrite of_long_round_odd.
+ unfold of_double, Float.to_single. instantiate (1 := Float.to_single_pl).
+ f_equal. unfold Float.of_long. f_equal.
+ set (n' := Z.land (Z.lor (Int64.signed n) (Z.land (Int64.signed n) 2047 + 2047)) (-2048)).
+ assert (int_round_odd (Int64.signed n) 11 = n') by (apply int_round_odd_plus; omega).
+ assert (Int64.min_signed <= n').
+ { rewrite <- H1. change Int64.min_signed with (int_round_odd Int64.min_signed 11). apply (int_round_odd_le 0 0); omega. }
+ assert (n' <= Int64.max_signed).
+ { apply Zle_trans with (int_round_odd Int64.max_signed 11).
+ rewrite <- H1. apply (int_round_odd_le 0 0); omega.
+ compute; intuition congruence. }
+ rewrite <- (Int64.signed_repr n') by omega.
+ f_equal. Int64.bit_solve. rewrite Int64.testbit_repr by auto. unfold n'.
+ rewrite Z.land_spec, Z.lor_spec. f_equal. f_equal.
+ rewrite Int64.bits_signed by omega. rewrite zlt_true by omega. auto.
+ unfold Int64.testbit. rewrite Int64.add_unsigned.
+ fold (Int64.testbit (Int64.repr
+ (Int64.unsigned (Int64.and n (Int64.repr 2047)) +
+ Int64.unsigned (Int64.repr 2047))) i).
+ rewrite Int64.testbit_repr by auto. f_equal. f_equal. unfold Int64.and.
+ change (Int64.unsigned (Int64.repr 2047)) with 2047.
+ change 2047 with (Z.ones 11). rewrite ! Z.land_ones by omega.
+ rewrite Int64.unsigned_repr. apply Int64.eqmod_mod_eq.
+ apply Zlt_gt. apply (Zpower_gt_0 radix2); omega.
+ apply Int64.eqmod_divides with (2^64). apply Int64.eqm_signed_unsigned.
+ exists (2^(64-11)); auto.
+ exploit (Z_mod_lt (Int64.unsigned n) (2^11)). compute; auto.
+ assert (2^11 < Int64.max_unsigned) by (compute; auto). omega.
+ apply Int64.same_bits_eqm; auto. exists (-1); auto.
+ split. auto. assert (-2^64 < Int64.min_signed) by (compute; auto).
+ assert (Int64.max_signed < 2^64) by (compute; auto).
+ xomega.
+Qed.
+
+End Float32.
+
+Global Opaque
+ Float.zero Float.eq_dec Float.neg Float.abs Float.of_single Float.to_single
+ Float.of_int Float.of_intu Float.of_long Float.of_longu
+ Float.to_int Float.to_intu Float.to_long Float.to_longu
+ Float.add Float.sub Float.mul Float.div Float.cmp
+ Float.to_bits Float.of_bits Float.from_words.
+
+Global Opaque
+ Float32.zero Float32.eq_dec Float32.neg Float32.abs
+ Float32.of_int Float32.of_intu Float32.of_long Float32.of_longu
+ Float32.to_int Float32.to_intu Float32.to_long Float32.to_longu
+ Float32.add Float32.sub Float32.mul Float32.div Float32.cmp
+ Float32.to_bits Float32.of_bits.
+