From 41b7ecb127b93b1aecc29a298ec21dc94603e6fa Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Mon, 29 Jul 2013 12:10:11 +0000
Subject: Optimize integer divisions by positive constants, turning them into
 multiply-high and shifts.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2300 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 arm/Asm.v           |  28 ++++++++++----
 arm/Asmgen.v        |  18 +++++++--
 arm/Asmgenproof1.v  |   7 ++--
 arm/ConstpropOp.vp  |   3 ++
 arm/Op.v            |  10 +++++
 arm/PrintAsm.ml     |  10 +++--
 arm/SelectOp.vp     |  41 +++++++-------------
 arm/SelectOpproof.v | 108 +++++++++++++++++++++++++++++++---------------------
 8 files changed, 135 insertions(+), 90 deletions(-)

(limited to 'arm')

diff --git a/arm/Asm.v b/arm/Asm.v
index 43a1435..76e8196 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -157,9 +157,11 @@ Inductive instruction : Type :=
   | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *)
   | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *)
   | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *)
-  | Psdiv: ireg -> ireg -> ireg -> instruction      (**r signed division *)
+  | Psdiv: instruction                              (**r signed division *)
+  | Psmull: ireg -> ireg -> ireg -> ireg -> instruction (**r signed multiply long *)
   | Psub: ireg -> ireg -> shift_op -> instruction  (**r integer subtraction *)
-  | Pudiv: ireg -> ireg -> ireg -> instruction      (**r unsigned division *)
+  | Pudiv: instruction                             (**r unsigned division *)
+  | Pumull: ireg -> ireg -> ireg -> ireg -> instruction (**r unsigned multiply long *)
   (* Floating-point coprocessor instructions (VFP double scalar operations) *)
   | Pfcpyd: freg -> freg -> instruction             (**r float move *)
   | Pfabsd: freg -> freg -> instruction             (**r float absolute value *)
@@ -559,18 +561,28 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
   | Pstrh r1 r2 sa => 
       exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
-  | Psdiv rd r1 r2 =>
-      match Val.divs rs#r1 rs#r2 with
-      | Some v => Next (nextinstr (rs#rd <- v)) m
+  | Psdiv =>
+      match Val.divs rs#IR0 rs#IR1 with
+      | Some v => Next (nextinstr (rs#IR0 <- v
+                                     #IR1 <- Vundef #IR2 <- Vundef
+                                     #IR3 <- Vundef #IR12 <- Vundef)) m
       | None => Stuck
       end
+  | Psmull rdl rdh r1 r2 =>
+      Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2)
+                         #rdh <- (Val.mulhs rs#r1 rs#r2))) m
   | Psub r1 r2 so =>  
       Next (nextinstr (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m
-  | Pudiv rd r1 r2 =>
-      match Val.divu rs#r1 rs#r2 with
-      | Some v => Next (nextinstr (rs#rd <- v)) m
+  | Pudiv =>
+      match Val.divu rs#IR0 rs#IR1 with
+      | Some v => Next (nextinstr (rs#IR0 <- v
+                                     #IR1 <- Vundef #IR2 <- Vundef
+                                     #IR3 <- Vundef #IR12 <- Vundef)) m
       | None => Stuck
       end
+  | Pumull rdl rdh r1 r2 =>
+      Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2)
+                         #rdh <- (Val.mulhu rs#r1 rs#r2))) m
   (* Floating-point coprocessor instructions *)
   | Pfcpyd r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs#r2))) m
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 6645149..3707b7f 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -299,12 +299,22 @@ Definition transl_op
       OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k
           else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k
           else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k)
-  | Odiv, a1 :: a2 :: nil =>
+  | Omulhs, a1 :: a2 :: nil =>
       do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Psdiv r r1 r2 :: k)
-  | Odivu, a1 :: a2 :: nil =>
+      OK (Psmull IR14 r r1 r2 :: k)
+  | Omulhu, a1 :: a2 :: nil =>
       do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
-      OK (Pudiv r r1 r2 :: k)
+      OK (Pumull IR14 r r1 r2 :: k)
+  | Odiv, a1 :: a2 :: nil =>
+      assertion (mreg_eq res R0);
+      assertion (mreg_eq a1 R0);
+      assertion (mreg_eq a2 R1);
+      OK (Psdiv :: k)
+  | Odivu, a1 :: a2 :: nil =>
+      assertion (mreg_eq res R0);
+      assertion (mreg_eq a1 R0);
+      assertion (mreg_eq a2 R1);
+      OK (Pudiv :: k)
   | Oand, a1 :: a2 :: nil =>
       do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
       OK (Pand r r1 (SOreg r2) :: k)
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index d77006a..21d2b73 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -942,11 +942,12 @@ Proof.
   eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
   intuition Simpl.
   (* divs *)
-  econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. 
-  intuition Simpl.
+Local Transparent destroyed_by_op.
+  econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto.
+  split. Simpl. simpl; intros. intuition Simpl. 
   (* divu *)
   econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto.
-  intuition Simpl.
+  split. Simpl. simpl; intros. intuition Simpl. 
   (* Oandimm *)
   generalize (andimm_correct x x0 i k rs m). 
   intros [rs' [A [B C]]]. 
diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp
index 9bf066b..caf0da6 100644
--- a/arm/ConstpropOp.vp
+++ b/arm/ConstpropOp.vp
@@ -112,6 +112,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Orsubshift s, I n1 :: I n2 :: nil => I(Int.sub (eval_static_shift s n2) n1)
   | Orsubimm n, I n1 :: nil => I (Int.sub n n1)
   | Omul, I n1 :: I n2 :: nil => I(Int.mul n1 n2)
+  | Omla, I n1 :: I n2 :: I n3 :: nil => I(Int.add (Int.mul n1 n2) n3)
+  | Omulhs, I n1 :: I n2 :: nil => I(Int.mulhs n1 n2)
+  | Omulhu, I n1 :: I n2 :: nil => I(Int.mulhu n1 n2)
   | Odiv, I n1 :: I n2 :: nil =>
       if Int.eq n2 Int.zero then Unknown else
       if Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone then Unknown
diff --git a/arm/Op.v b/arm/Op.v
index af3ccdb..fe97d36 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -82,6 +82,8 @@ Inductive operation : Type :=
   | Orsubimm: int -> operation          (**r [rd = n - r1] *)
   | Omul: operation                     (**r [rd = r1 * r2] *)
   | Omla: operation                     (**r [rd = r1 * r2 + r3] *)
+  | Omulhs: operation                   (**r [rd = high part of r1 * r2, signed] *)
+  | Omulhu: operation                   (**r [rd = high part of r1 * r2, unsigned] *)
   | Odiv: operation                     (**r [rd = r1 / r2] (signed) *)
   | Odivu: operation                    (**r [rd = r1 / r2] (unsigned) *)
   | Oand: operation                     (**r [rd = r1 & r2] *)
@@ -226,6 +228,8 @@ Definition eval_operation
   | Orsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1)
   | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2)
   | Omla, v1 :: v2 :: v3 :: nil => Some (Val.add (Val.mul v1 v2) v3)
+  | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2)
+  | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2)
   | Odiv, v1 :: v2 :: nil => Val.divs v1 v2
   | Odivu, v1 :: v2 :: nil => Val.divu v1 v2
   | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2)
@@ -319,6 +323,8 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Orsubimm _ => (Tint :: nil, Tint)
   | Omul => (Tint :: Tint :: nil, Tint)
   | Omla => (Tint :: Tint :: Tint :: nil, Tint)
+  | Omulhs => (Tint :: Tint :: nil, Tint)
+  | Omulhu => (Tint :: Tint :: nil, Tint)
   | Odiv => (Tint :: Tint :: nil, Tint)
   | Odivu => (Tint :: Tint :: nil, Tint)
   | Oand => (Tint :: Tint :: nil, Tint)
@@ -396,6 +402,8 @@ Proof with (try exact I).
   destruct v0...
   destruct v0; destruct v1...
   destruct v0... destruct v1... destruct v2...
+  destruct v0; destruct v1...
+  destruct v0; destruct v1...
   destruct v0; destruct v1; simpl in H0; inv H0.
     destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2...
   destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero); inv H2...
@@ -823,6 +831,8 @@ Proof.
 
   inv H4; inv H2; simpl; auto.
   apply Values.val_add_inject; auto. inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
+  inv H4; inv H2; simpl; auto.
   inv H4; inv H3; simpl in H1; inv H1. simpl. 
     destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists.
   inv H4; inv H3; simpl in H1; inv H1. simpl. 
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index d700326..c0e2687 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -599,14 +599,16 @@ let print_instruction oc = function
       fprintf oc "	strb	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Pstrh(r1, r2, sa) ->
       fprintf oc "	strh	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
-  | Psdiv(r1, r2, r3) ->
-      assert (r1 = IR0 && r2 = IR0 && r3 = IR1);
+  | Psdiv ->
       fprintf oc "	bl	__aeabi_idiv\n"; 1
+  | Psmull(r1, r2, r3, r4) ->
+      fprintf oc "	smull	%a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
   | Psub(r1, r2, so) ->
       fprintf oc "	sub	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
-  | Pudiv(r1, r2, r3) ->
-      assert (r1 = IR0 && r2 = IR0 && r3 = IR1);
+  | Pudiv ->
       fprintf oc "	bl	__aeabi_uidiv\n"; 1
+  | Pumull(r1, r2, r3, r4) ->
+      fprintf oc "	umull	%a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
   (* Floating-point coprocessor instructions *)
   | Pfcpyd(r1, r2) ->
       fprintf oc "	fcpyd	%a, %a\n" freg r1 freg r2; 1
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index d81328b..4cd09d1 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -272,33 +272,20 @@ Definition mod_aux (divop: operation) (e1 e2: expr) :=
                            Enil) :::
                  Enil))).
 
-Definition divs (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil).
-
-Definition mods := mod_aux Odiv.
-
-Definition divuimm (e: expr) (n: int) :=
-  match Int.is_power2 n with
-  | Some l => shruimm e l
-  | None   => Eop Odivu (e ::: Eop (Ointconst n) Enil ::: Enil)
-  end.
-
-Nondetfunction divu (e1: expr) (e2: expr) :=
-  match e2 with
-  | Eop (Ointconst n2) Enil => divuimm e1 n2
-  | _ => Eop Odivu (e1:::e2:::Enil)
-  end.
-
-Definition moduimm (e: expr) (n: int) :=
-  match Int.is_power2 n with
-  | Some l => Eop (Oandimm (Int.sub n Int.one)) (e ::: Enil)
-  | None   => mod_aux Odivu e (Eop (Ointconst n) Enil)
-  end.
-
-Nondetfunction modu (e1: expr) (e2: expr) :=
-  match e2 with
-  | Eop (Ointconst n2) Enil => moduimm e1 n2
-  | _ => mod_aux Odivu e1 e2
-  end.
+Definition divs_base (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil).
+Definition mods_base := mod_aux Odiv.
+Definition divu_base (e1: expr) (e2: expr) := Eop Odivu (e1:::e2:::Enil).
+Definition modu_base := mod_aux Odivu.
+
+Definition shrximm (e1: expr) (n2: int) :=
+  if Int.eq n2 Int.zero
+  then e1
+  else Elet e1
+         (shrimm
+             (add (Eletvar O)
+                  (shruimm (shrimm (Eletvar O) (Int.repr 31))
+                           (Int.sub Int.iwordsize n2)))
+             n2).
 
 (** ** General shifts *)
 
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index a71ead7..9beb1ad 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -468,83 +468,103 @@ Proof.
   reflexivity.
 Qed.
 
-Theorem eval_divs:
+Theorem eval_divs_base:
   forall le a b x y z,
   eval_expr ge sp e m le a x ->
   eval_expr ge sp e m le b y ->
   Val.divs x y = Some z ->
-  exists v, eval_expr ge sp e m le (divs a b) v /\ Val.lessdef z v.
+  exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros. unfold divs. exists z; split. EvalOp. auto.
+  intros. unfold divs_base. exists z; split. EvalOp. auto.
 Qed.
 
-Theorem eval_mods:
+Theorem eval_mods_base:
   forall le a b x y z,
   eval_expr ge sp e m le a x ->
   eval_expr ge sp e m le b y ->
   Val.mods x y = Some z ->
-  exists v, eval_expr ge sp e m le (mods a b) v /\ Val.lessdef z v.
+  exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold mods. 
+  intros; unfold mods_base. 
   exploit Val.mods_divs; eauto. intros [v [A B]].
   subst. econstructor; split; eauto.
   apply eval_mod_aux with (semdivop := Val.divs); auto.
 Qed.
 
-Theorem eval_divuimm:
-  forall le n a x z,
-  eval_expr ge sp e m le a x ->
-  Val.divu x (Vint n) = Some z ->
-  exists v, eval_expr ge sp e m le (divuimm a n) v /\ Val.lessdef z v.
-Proof.
-  intros; unfold divuimm. 
-  destruct (Int.is_power2 n) eqn:?. 
-  replace z with (Val.shru x (Vint i)). apply eval_shruimm; auto.
-  eapply Val.divu_pow2; eauto.
-  TrivialExists. 
-  econstructor. eauto. econstructor. EvalOp. simpl; eauto. constructor. auto.
-Qed.
-
-Theorem eval_divu:
+Theorem eval_divu_base:
   forall le a x b y z,
   eval_expr ge sp e m le a x ->
   eval_expr ge sp e m le b y ->
   Val.divu x y = Some z ->
-  exists v, eval_expr ge sp e m le (divu a b) v /\ Val.lessdef z v.
+  exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros until z. unfold divu; destruct (divu_match b); intros; InvEval.
-  eapply eval_divuimm; eauto.
-  TrivialExists. 
+  intros. unfold divu_base. exists z; split. EvalOp. auto.
 Qed.
 
-Theorem eval_moduimm:
-  forall le n a x z,
+Theorem eval_modu_base:
+  forall le a x b y z,
   eval_expr ge sp e m le a x ->
-  Val.modu x (Vint n) = Some z ->
-  exists v, eval_expr ge sp e m le (moduimm a n) v /\ Val.lessdef z v.
+  eval_expr ge sp e m le b y ->
+  Val.modu x y = Some z ->
+  exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v.
 Proof.
-  intros; unfold moduimm. 
-  destruct (Int.is_power2 n) eqn:?.
-  replace z with (Val.and x (Vint (Int.sub n Int.one))). TrivialExists.
-  eapply Val.modu_pow2; eauto.
+  intros; unfold modu_base. 
   exploit Val.modu_divu; eauto. intros [v [A B]].
   subst. econstructor; split; eauto.
   apply eval_mod_aux with (semdivop := Val.divu); auto.
-  EvalOp.
 Qed.
 
-Theorem eval_modu:
-  forall le a x b y z,
+Theorem eval_shrximm:
+  forall le a n x z,
   eval_expr ge sp e m le a x ->
-  eval_expr ge sp e m le b y ->
-  Val.modu x y = Some z ->
-  exists v, eval_expr ge sp e m le (modu a b) v /\ Val.lessdef z v.
+  Val.shrx x (Vint n) = Some z ->
+  exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v.
 Proof.
-  intros until y; unfold modu; case (modu_match b); intros; InvEval.
-  eapply eval_moduimm; eauto.
-  exploit Val.modu_divu; eauto. intros [v [A B]].
-  subst. econstructor; split; eauto.
-  apply eval_mod_aux with (semdivop := Val.divu); auto.
+  intros. unfold shrximm. 
+  predSpec Int.eq Int.eq_spec n Int.zero.
+- subst n. exists x; split; auto. 
+  destruct x; simpl in H0; try discriminate.
+  destruct (Int.ltu Int.zero (Int.repr 31)); inv H0.
+  replace (Int.shrx i Int.zero) with i. auto. 
+  unfold Int.shrx, Int.divs. rewrite Int.shl_zero. 
+  change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto.
+- destruct x; simpl in H0; try discriminate.
+  destruct (Int.ltu n (Int.repr 31)) eqn:LT31; inv H0.
+  assert (A: eval_expr ge sp e m (Vint i :: le) (Eletvar 0) (Vint i))
+  by (constructor; auto).
+  exploit (eval_shrimm (Int.repr 31)). eexact A.
+  intros [v [B LD]]. simpl in LD. 
+  change (Int.ltu (Int.repr 31) Int.iwordsize) with true in LD. 
+  simpl in LD; inv LD.
+  exploit (eval_shruimm (Int.sub Int.iwordsize n)). eexact B.
+  intros [v [C LD]]. simpl in LD.
+  assert (RANGE: Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true).
+  {
+    generalize (Int.ltu_inv _ _ LT31). intros.
+    unfold Int.sub, Int.ltu. rewrite Int.unsigned_repr_wordsize. 
+    rewrite Int.unsigned_repr. apply zlt_true.
+    assert (Int.unsigned n <> 0). 
+    { red; intros; elim H1. rewrite <- (Int.repr_unsigned n). rewrite H2; reflexivity. }
+    omega. 
+    change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1) in H0.
+    generalize Int.wordsize_max_unsigned; omega. 
+  }
+  rewrite RANGE in LD. inv LD. 
+  exploit eval_add. eexact A. eexact C. intros [v [D LD]].
+  simpl in LD. inv LD. 
+  exploit (eval_shrimm n). eexact D. intros [v [E LD]].
+  simpl in LD. 
+  assert (RANGE2: Int.ltu n Int.iwordsize = true).
+  {
+    generalize (Int.ltu_inv _ _ LT31). intros.
+    unfold Int.ltu. rewrite Int.unsigned_repr_wordsize. apply zlt_true.
+    change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1) in H0.
+    omega.
+  }
+  rewrite RANGE2 in LD. inv LD. 
+  econstructor; split. econstructor. eassumption. eexact E. 
+  rewrite Int.shrx_shr_2 by auto.
+  auto.
 Qed.
 
 Theorem eval_shl: binary_constructor_sound shl Val.shl.
-- 
cgit v1.2.3