summaryrefslogtreecommitdiff
path: root/backend/Tunneling.v
blob: 18414a89afb541c4c1f4b292bc1360de35b08731 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
(* *********************************************************************)
(*                                                                     *)
(*              The Compcert verified compiler                         *)
(*                                                                     *)
(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
(*                                                                     *)
(*  Copyright Institut National de Recherche en Informatique et en     *)
(*  Automatique.  All rights reserved.  This file is distributed       *)
(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
(*                                                                     *)
(* *********************************************************************)

(** Branch tunneling (optimization of branches to branches). *)

Require Import Coqlib.
Require Import Maps.
Require Import UnionFind.
Require Import AST.
Require Import LTL.

(** Branch tunneling shortens sequences of branches (with no intervening
  computations) by rewriting the branch and conditional branch instructions
  so that they jump directly to the end of the branch sequence.
  For example:
<<
     L1: nop L2;                          L1: nop L3;
     L2; nop L3;               becomes    L2: nop L3;
     L3: instr;                           L3: instr;
     L4: if (cond) goto L1;               L4: if (cond) goto L3;
>>
  This optimization can be applied to several of our intermediate
  languages.  We choose to perform it on the [LTL] language,
  after register allocation but before code linearization.
  Register allocation can delete instructions (such as dead
  computations or useless moves), therefore there are more
  opportunities for tunneling after allocation than before.
  Symmetrically, prior tunneling helps linearization to produce
  better code, e.g. by revealing that some [nop] instructions are
  dead code (as the "nop L3" in the example above).
*)

(** The naive implementation of branch tunneling would replace
  any branch to a node [pc] by a branch to the node
  [branch_target f pc], defined as follows:
<<
  branch_target f pc = branch_target f pc'  if f(pc) = nop pc'
                     = pc                   otherwise
>>
  However, this definition can fail to terminate if
  the program can contain loops consisting only of branches, as in
<<
     L1: nop L1;
>>
  or
<<   L1: nop L2;
     L2: nop L1;
>>
  Coq warns us of this fact by not accepting the definition 
  of [branch_target] above.

  To handle this problem, we proceed in two passes.  The first pass
  populates a union-find data structure, adding equalities [pc = pc']
  for every instruction [pc: nop pc'] in the function. *)

Module U := UnionFind.UF(PTree).

Definition record_goto (uf: U.t) (pc: node) (i: instruction) : U.t :=
  match i with
  | Lnop s => U.union uf pc s
  | _ => uf
  end.

Definition record_gotos (f: LTL.function) : U.t :=
  PTree.fold record_goto f.(fn_code) U.empty.

(** The second pass rewrites all LTL instructions, replacing every
  successor [s] of every instruction by the canonical representative
  of its equivalence class in the union-find data structure. *)

Definition tunnel_instr (uf: U.t) (b: instruction) : instruction :=
  match b with
  | Lnop s =>
      Lnop (U.repr uf s)
  | Lop op args res s =>
      Lop op args res (U.repr uf s)
  | Lload chunk addr args dst s =>
      Lload chunk addr args dst (U.repr uf s)
  | Lstore chunk addr args src s =>
      Lstore chunk addr args src (U.repr uf s)
  | Lcall sig ros args res s =>
      Lcall sig ros args res (U.repr uf s)
  | Ltailcall sig ros args =>
      Ltailcall sig ros args
  | Lbuiltin ef args res s =>
      Lbuiltin ef args res (U.repr uf s)
  | Lcond cond args s1 s2 =>
      Lcond cond args (U.repr uf s1) (U.repr uf s2)
  | Ljumptable arg tbl =>
      Ljumptable arg (List.map (U.repr uf) tbl)
  | Lreturn or =>
      Lreturn or
  end.

Definition tunnel_function (f: LTL.function) : LTL.function :=
  let uf := record_gotos f in
  mkfunction
    (fn_sig f)
    (fn_params f)
    (fn_stacksize f)
    (PTree.map (fun pc b => tunnel_instr uf b) (fn_code f))
    (U.repr uf (fn_entrypoint f)).

Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
  transf_fundef tunnel_function f.

Definition tunnel_program (p: LTL.program) : LTL.program :=
  transform_program tunnel_fundef p.