From 41b7ecb127b93b1aecc29a298ec21dc94603e6fa Mon Sep 17 00:00:00 2001 From: xleroy Date: Mon, 29 Jul 2013 12:10:11 +0000 Subject: Optimize integer divisions by positive constants, turning them into multiply-high and shifts. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2300 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- arm/Asm.v | 28 ++++++++++---- arm/Asmgen.v | 18 +++++++-- arm/Asmgenproof1.v | 7 ++-- arm/ConstpropOp.vp | 3 ++ arm/Op.v | 10 +++++ arm/PrintAsm.ml | 10 +++-- arm/SelectOp.vp | 41 +++++++------------- arm/SelectOpproof.v | 108 +++++++++++++++++++++++++++++++--------------------- 8 files changed, 135 insertions(+), 90 deletions(-) (limited to 'arm') diff --git a/arm/Asm.v b/arm/Asm.v index 43a1435..76e8196 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -157,9 +157,11 @@ Inductive instruction : Type := | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *) | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *) | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *) - | Psdiv: ireg -> ireg -> ireg -> instruction (**r signed division *) + | Psdiv: instruction (**r signed division *) + | Psmull: ireg -> ireg -> ireg -> ireg -> instruction (**r signed multiply long *) | Psub: ireg -> ireg -> shift_op -> instruction (**r integer subtraction *) - | Pudiv: ireg -> ireg -> ireg -> instruction (**r unsigned division *) + | Pudiv: instruction (**r unsigned division *) + | Pumull: ireg -> ireg -> ireg -> ireg -> instruction (**r unsigned multiply long *) (* Floating-point coprocessor instructions (VFP double scalar operations) *) | Pfcpyd: freg -> freg -> instruction (**r float move *) | Pfabsd: freg -> freg -> instruction (**r float absolute value *) @@ -559,18 +561,28 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m | Pstrh r1 r2 sa => exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m - | Psdiv rd r1 r2 => - match Val.divs rs#r1 rs#r2 with - | Some v => Next (nextinstr (rs#rd <- v)) m + | Psdiv => + match Val.divs rs#IR0 rs#IR1 with + | Some v => Next (nextinstr (rs#IR0 <- v + #IR1 <- Vundef #IR2 <- Vundef + #IR3 <- Vundef #IR12 <- Vundef)) m | None => Stuck end + | Psmull rdl rdh r1 r2 => + Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2) + #rdh <- (Val.mulhs rs#r1 rs#r2))) m | Psub r1 r2 so => Next (nextinstr (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m - | Pudiv rd r1 r2 => - match Val.divu rs#r1 rs#r2 with - | Some v => Next (nextinstr (rs#rd <- v)) m + | Pudiv => + match Val.divu rs#IR0 rs#IR1 with + | Some v => Next (nextinstr (rs#IR0 <- v + #IR1 <- Vundef #IR2 <- Vundef + #IR3 <- Vundef #IR12 <- Vundef)) m | None => Stuck end + | Pumull rdl rdh r1 r2 => + Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2) + #rdh <- (Val.mulhu rs#r1 rs#r2))) m (* Floating-point coprocessor instructions *) | Pfcpyd r1 r2 => Next (nextinstr (rs#r1 <- (rs#r2))) m diff --git a/arm/Asmgen.v b/arm/Asmgen.v index 6645149..3707b7f 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -299,12 +299,22 @@ Definition transl_op OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k) - | Odiv, a1 :: a2 :: nil => + | Omulhs, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Psdiv r r1 r2 :: k) - | Odivu, a1 :: a2 :: nil => + OK (Psmull IR14 r r1 r2 :: k) + | Omulhu, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pudiv r r1 r2 :: k) + OK (Pumull IR14 r r1 r2 :: k) + | Odiv, a1 :: a2 :: nil => + assertion (mreg_eq res R0); + assertion (mreg_eq a1 R0); + assertion (mreg_eq a2 R1); + OK (Psdiv :: k) + | Odivu, a1 :: a2 :: nil => + assertion (mreg_eq res R0); + assertion (mreg_eq a1 R0); + assertion (mreg_eq a2 R1); + OK (Pudiv :: k) | Oand, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Pand r r1 (SOreg r2) :: k) diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v index d77006a..21d2b73 100644 --- a/arm/Asmgenproof1.v +++ b/arm/Asmgenproof1.v @@ -942,11 +942,12 @@ Proof. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. intuition Simpl. (* divs *) - econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. - intuition Simpl. +Local Transparent destroyed_by_op. + econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. + split. Simpl. simpl; intros. intuition Simpl. (* divu *) econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. - intuition Simpl. + split. Simpl. simpl; intros. intuition Simpl. (* Oandimm *) generalize (andimm_correct x x0 i k rs m). intros [rs' [A [B C]]]. diff --git a/arm/ConstpropOp.vp b/arm/ConstpropOp.vp index 9bf066b..caf0da6 100644 --- a/arm/ConstpropOp.vp +++ b/arm/ConstpropOp.vp @@ -112,6 +112,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Orsubshift s, I n1 :: I n2 :: nil => I(Int.sub (eval_static_shift s n2) n1) | Orsubimm n, I n1 :: nil => I (Int.sub n n1) | Omul, I n1 :: I n2 :: nil => I(Int.mul n1 n2) + | Omla, I n1 :: I n2 :: I n3 :: nil => I(Int.add (Int.mul n1 n2) n3) + | Omulhs, I n1 :: I n2 :: nil => I(Int.mulhs n1 n2) + | Omulhu, I n1 :: I n2 :: nil => I(Int.mulhu n1 n2) | Odiv, I n1 :: I n2 :: nil => if Int.eq n2 Int.zero then Unknown else if Int.eq n1 (Int.repr Int.min_signed) && Int.eq n2 Int.mone then Unknown diff --git a/arm/Op.v b/arm/Op.v index af3ccdb..fe97d36 100644 --- a/arm/Op.v +++ b/arm/Op.v @@ -82,6 +82,8 @@ Inductive operation : Type := | Orsubimm: int -> operation (**r [rd = n - r1] *) | Omul: operation (**r [rd = r1 * r2] *) | Omla: operation (**r [rd = r1 * r2 + r3] *) + | Omulhs: operation (**r [rd = high part of r1 * r2, signed] *) + | Omulhu: operation (**r [rd = high part of r1 * r2, unsigned] *) | Odiv: operation (**r [rd = r1 / r2] (signed) *) | Odivu: operation (**r [rd = r1 / r2] (unsigned) *) | Oand: operation (**r [rd = r1 & r2] *) @@ -226,6 +228,8 @@ Definition eval_operation | Orsubimm n, v1 :: nil => Some (Val.sub (Vint n) v1) | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) | Omla, v1 :: v2 :: v3 :: nil => Some (Val.add (Val.mul v1 v2) v3) + | Omulhs, v1::v2::nil => Some (Val.mulhs v1 v2) + | Omulhu, v1::v2::nil => Some (Val.mulhu v1 v2) | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) @@ -319,6 +323,8 @@ Definition type_of_operation (op: operation) : list typ * typ := | Orsubimm _ => (Tint :: nil, Tint) | Omul => (Tint :: Tint :: nil, Tint) | Omla => (Tint :: Tint :: Tint :: nil, Tint) + | Omulhs => (Tint :: Tint :: nil, Tint) + | Omulhu => (Tint :: Tint :: nil, Tint) | Odiv => (Tint :: Tint :: nil, Tint) | Odivu => (Tint :: Tint :: nil, Tint) | Oand => (Tint :: Tint :: nil, Tint) @@ -396,6 +402,8 @@ Proof with (try exact I). destruct v0... destruct v0; destruct v1... destruct v0... destruct v1... destruct v2... + destruct v0; destruct v1... + destruct v0; destruct v1... destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... destruct v0; destruct v1; simpl in H0; inv H0. destruct (Int.eq i0 Int.zero); inv H2... @@ -823,6 +831,8 @@ Proof. inv H4; inv H2; simpl; auto. apply Values.val_add_inject; auto. inv H4; inv H2; simpl; auto. + inv H4; inv H2; simpl; auto. + inv H4; inv H2; simpl; auto. inv H4; inv H3; simpl in H1; inv H1. simpl. destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. TrivialExists. inv H4; inv H3; simpl in H1; inv H1. simpl. diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml index d700326..c0e2687 100644 --- a/arm/PrintAsm.ml +++ b/arm/PrintAsm.ml @@ -599,14 +599,16 @@ let print_instruction oc = function fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 | Pstrh(r1, r2, sa) -> fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 - | Psdiv(r1, r2, r3) -> - assert (r1 = IR0 && r2 = IR0 && r3 = IR1); + | Psdiv -> fprintf oc " bl __aeabi_idiv\n"; 1 + | Psmull(r1, r2, r3, r4) -> + fprintf oc " smull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 | Psub(r1, r2, so) -> fprintf oc " sub %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 - | Pudiv(r1, r2, r3) -> - assert (r1 = IR0 && r2 = IR0 && r3 = IR1); + | Pudiv -> fprintf oc " bl __aeabi_uidiv\n"; 1 + | Pumull(r1, r2, r3, r4) -> + fprintf oc " umull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 (* Floating-point coprocessor instructions *) | Pfcpyd(r1, r2) -> fprintf oc " fcpyd %a, %a\n" freg r1 freg r2; 1 diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp index d81328b..4cd09d1 100644 --- a/arm/SelectOp.vp +++ b/arm/SelectOp.vp @@ -272,33 +272,20 @@ Definition mod_aux (divop: operation) (e1 e2: expr) := Enil) ::: Enil))). -Definition divs (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil). - -Definition mods := mod_aux Odiv. - -Definition divuimm (e: expr) (n: int) := - match Int.is_power2 n with - | Some l => shruimm e l - | None => Eop Odivu (e ::: Eop (Ointconst n) Enil ::: Enil) - end. - -Nondetfunction divu (e1: expr) (e2: expr) := - match e2 with - | Eop (Ointconst n2) Enil => divuimm e1 n2 - | _ => Eop Odivu (e1:::e2:::Enil) - end. - -Definition moduimm (e: expr) (n: int) := - match Int.is_power2 n with - | Some l => Eop (Oandimm (Int.sub n Int.one)) (e ::: Enil) - | None => mod_aux Odivu e (Eop (Ointconst n) Enil) - end. - -Nondetfunction modu (e1: expr) (e2: expr) := - match e2 with - | Eop (Ointconst n2) Enil => moduimm e1 n2 - | _ => mod_aux Odivu e1 e2 - end. +Definition divs_base (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil). +Definition mods_base := mod_aux Odiv. +Definition divu_base (e1: expr) (e2: expr) := Eop Odivu (e1:::e2:::Enil). +Definition modu_base := mod_aux Odivu. + +Definition shrximm (e1: expr) (n2: int) := + if Int.eq n2 Int.zero + then e1 + else Elet e1 + (shrimm + (add (Eletvar O) + (shruimm (shrimm (Eletvar O) (Int.repr 31)) + (Int.sub Int.iwordsize n2))) + n2). (** ** General shifts *) diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v index a71ead7..9beb1ad 100644 --- a/arm/SelectOpproof.v +++ b/arm/SelectOpproof.v @@ -468,83 +468,103 @@ Proof. reflexivity. Qed. -Theorem eval_divs: +Theorem eval_divs_base: forall le a b x y z, eval_expr ge sp e m le a x -> eval_expr ge sp e m le b y -> Val.divs x y = Some z -> - exists v, eval_expr ge sp e m le (divs a b) v /\ Val.lessdef z v. + exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. Proof. - intros. unfold divs. exists z; split. EvalOp. auto. + intros. unfold divs_base. exists z; split. EvalOp. auto. Qed. -Theorem eval_mods: +Theorem eval_mods_base: forall le a b x y z, eval_expr ge sp e m le a x -> eval_expr ge sp e m le b y -> Val.mods x y = Some z -> - exists v, eval_expr ge sp e m le (mods a b) v /\ Val.lessdef z v. + exists v, eval_expr ge sp e m le (mods_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold mods. + intros; unfold mods_base. exploit Val.mods_divs; eauto. intros [v [A B]]. subst. econstructor; split; eauto. apply eval_mod_aux with (semdivop := Val.divs); auto. Qed. -Theorem eval_divuimm: - forall le n a x z, - eval_expr ge sp e m le a x -> - Val.divu x (Vint n) = Some z -> - exists v, eval_expr ge sp e m le (divuimm a n) v /\ Val.lessdef z v. -Proof. - intros; unfold divuimm. - destruct (Int.is_power2 n) eqn:?. - replace z with (Val.shru x (Vint i)). apply eval_shruimm; auto. - eapply Val.divu_pow2; eauto. - TrivialExists. - econstructor. eauto. econstructor. EvalOp. simpl; eauto. constructor. auto. -Qed. - -Theorem eval_divu: +Theorem eval_divu_base: forall le a x b y z, eval_expr ge sp e m le a x -> eval_expr ge sp e m le b y -> Val.divu x y = Some z -> - exists v, eval_expr ge sp e m le (divu a b) v /\ Val.lessdef z v. + exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. - intros until z. unfold divu; destruct (divu_match b); intros; InvEval. - eapply eval_divuimm; eauto. - TrivialExists. + intros. unfold divu_base. exists z; split. EvalOp. auto. Qed. -Theorem eval_moduimm: - forall le n a x z, +Theorem eval_modu_base: + forall le a x b y z, eval_expr ge sp e m le a x -> - Val.modu x (Vint n) = Some z -> - exists v, eval_expr ge sp e m le (moduimm a n) v /\ Val.lessdef z v. + eval_expr ge sp e m le b y -> + Val.modu x y = Some z -> + exists v, eval_expr ge sp e m le (modu_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold moduimm. - destruct (Int.is_power2 n) eqn:?. - replace z with (Val.and x (Vint (Int.sub n Int.one))). TrivialExists. - eapply Val.modu_pow2; eauto. + intros; unfold modu_base. exploit Val.modu_divu; eauto. intros [v [A B]]. subst. econstructor; split; eauto. apply eval_mod_aux with (semdivop := Val.divu); auto. - EvalOp. Qed. -Theorem eval_modu: - forall le a x b y z, +Theorem eval_shrximm: + forall le a n x z, eval_expr ge sp e m le a x -> - eval_expr ge sp e m le b y -> - Val.modu x y = Some z -> - exists v, eval_expr ge sp e m le (modu a b) v /\ Val.lessdef z v. + Val.shrx x (Vint n) = Some z -> + exists v, eval_expr ge sp e m le (shrximm a n) v /\ Val.lessdef z v. Proof. - intros until y; unfold modu; case (modu_match b); intros; InvEval. - eapply eval_moduimm; eauto. - exploit Val.modu_divu; eauto. intros [v [A B]]. - subst. econstructor; split; eauto. - apply eval_mod_aux with (semdivop := Val.divu); auto. + intros. unfold shrximm. + predSpec Int.eq Int.eq_spec n Int.zero. +- subst n. exists x; split; auto. + destruct x; simpl in H0; try discriminate. + destruct (Int.ltu Int.zero (Int.repr 31)); inv H0. + replace (Int.shrx i Int.zero) with i. auto. + unfold Int.shrx, Int.divs. rewrite Int.shl_zero. + change (Int.signed Int.one) with 1. rewrite Z.quot_1_r. rewrite Int.repr_signed; auto. +- destruct x; simpl in H0; try discriminate. + destruct (Int.ltu n (Int.repr 31)) eqn:LT31; inv H0. + assert (A: eval_expr ge sp e m (Vint i :: le) (Eletvar 0) (Vint i)) + by (constructor; auto). + exploit (eval_shrimm (Int.repr 31)). eexact A. + intros [v [B LD]]. simpl in LD. + change (Int.ltu (Int.repr 31) Int.iwordsize) with true in LD. + simpl in LD; inv LD. + exploit (eval_shruimm (Int.sub Int.iwordsize n)). eexact B. + intros [v [C LD]]. simpl in LD. + assert (RANGE: Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize = true). + { + generalize (Int.ltu_inv _ _ LT31). intros. + unfold Int.sub, Int.ltu. rewrite Int.unsigned_repr_wordsize. + rewrite Int.unsigned_repr. apply zlt_true. + assert (Int.unsigned n <> 0). + { red; intros; elim H1. rewrite <- (Int.repr_unsigned n). rewrite H2; reflexivity. } + omega. + change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1) in H0. + generalize Int.wordsize_max_unsigned; omega. + } + rewrite RANGE in LD. inv LD. + exploit eval_add. eexact A. eexact C. intros [v [D LD]]. + simpl in LD. inv LD. + exploit (eval_shrimm n). eexact D. intros [v [E LD]]. + simpl in LD. + assert (RANGE2: Int.ltu n Int.iwordsize = true). + { + generalize (Int.ltu_inv _ _ LT31). intros. + unfold Int.ltu. rewrite Int.unsigned_repr_wordsize. apply zlt_true. + change (Int.unsigned (Int.repr 31)) with (Int.zwordsize - 1) in H0. + omega. + } + rewrite RANGE2 in LD. inv LD. + econstructor; split. econstructor. eassumption. eexact E. + rewrite Int.shrx_shr_2 by auto. + auto. Qed. Theorem eval_shl: binary_constructor_sound shl Val.shl. -- cgit v1.2.3