summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Changelog2
-rw-r--r--backend/CSE.v24
-rw-r--r--backend/CSEproof.v17
3 files changed, 36 insertions, 7 deletions
diff --git a/Changelog b/Changelog
index b8461c4..85e4cad 100644
--- a/Changelog
+++ b/Changelog
@@ -4,6 +4,8 @@ Development version
in tail-recursive style.
- Reduced memory requirements of constant propagation pass by forgetting
compile-time approximations of dead variables.
+- More aggressive common subexpression elimination across some builtin
+ function calls, esp. annotations.
Release 1.13, 2013-03-12
diff --git a/backend/CSE.v b/backend/CSE.v
index 45a804e..d381cc8 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -420,8 +420,18 @@ Module Solver := BBlock_solver(Numbering).
common subexpressions across a function call (there is a risk of
increasing too much the register pressure across the call), so we
just forget all equations and start afresh with an empty numbering.
- Finally, the remaining instructions modify neither registers nor
- the memory, so we keep the numbering unchanged. *)
+ Finally, for instructions that modify neither registers nor
+ the memory, we keep the numbering unchanged.
+
+ For builtin invocations [Ibuiltin], we have three strategies:
+- Forget all equations. This is appropriate for builtins that can be
+ turned into function calls ([EF_external], [EF_malloc], [EF_free]).
+- Forget equations involving loads but keep equations over registers.
+ This is appropriate for builtins that modify memory, e.g. [EF_memcpy].
+- Keep all equations, taking advantage of the fact that neither memory
+ nor registers are modified. This is appropriate for annotations,
+ for inlined builtin functions, and for volatile loads.
+*)
Definition transfer (f: function) (pc: node) (before: numbering) :=
match f.(fn_code)!pc with
@@ -441,7 +451,15 @@ Definition transfer (f: function) (pc: node) (before: numbering) :=
| Itailcall sig ros args =>
empty_numbering
| Ibuiltin ef args res s =>
- add_unknown (kill_loads before) res
+ match ef with
+ | EF_external _ _ | EF_malloc | EF_free | EF_inline_asm _ =>
+ empty_numbering
+ | EF_vstore _ | EF_vstore_global _ _ _ | EF_memcpy _ _ =>
+ add_unknown (kill_loads before) res
+ | EF_builtin _ _ | EF_vload _ | EF_vload_global _ _ _
+ | EF_annot _ _ | EF_annot_val _ _ =>
+ add_unknown before res
+ end
| Icond cond args ifso ifnot =>
before
| Ijumptable arg tbl =>
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index 44d23c8..8fc9407 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -307,7 +307,8 @@ Proof.
apply wf_add_store; auto.
apply wf_empty_numbering.
apply wf_empty_numbering.
- apply wf_add_unknown. apply wf_kill_equations; auto.
+ destruct e; (apply wf_empty_numbering ||
+ apply wf_add_unknown; auto; apply wf_kill_equations; auto).
Qed.
(** As a consequence, the numberings computed by the static analysis
@@ -1088,9 +1089,17 @@ Proof.
apply wt_regset_assign; auto.
rewrite H6. eapply external_call_well_typed; eauto.
eapply analysis_correct_1; eauto. simpl; auto.
- unfold transfer; rewrite H.
- apply add_unknown_satisfiable. apply wf_kill_equations. eapply wf_analyze; eauto.
- eapply kill_loads_satisfiable; eauto.
+ unfold transfer; rewrite H.
+ assert (CASE1: numbering_satisfiable ge sp (rs#res <- v) m' empty_numbering).
+ { apply empty_numbering_satisfiable. }
+ assert (CASE2: m' = m -> numbering_satisfiable ge sp (rs#res <- v) m' (add_unknown approx#pc res)).
+ { intros. rewrite H1. apply add_unknown_satisfiable.
+ eapply wf_analyze; eauto. auto. }
+ assert (CASE3: numbering_satisfiable ge sp (rs#res <- v) m'
+ (add_unknown (kill_loads approx#pc) res)).
+ { apply add_unknown_satisfiable. apply wf_kill_equations. eapply wf_analyze; eauto.
+ eapply kill_loads_satisfiable; eauto. }
+ destruct ef; auto; apply CASE2; inv H0; auto.
(* Icond *)
destruct (valnum_regs approx!!pc args) as [n1 vl] eqn:?.