1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
|
(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** RTL function inlining *)
Require Import Coqlib.
Require Import Wfsimpl.
Require Import Errors.
Require Import Maps.
Require Import AST.
Require Import Integers.
Require Import Values.
Require Import Op.
Require Import Registers.
Require Import RTL.
Ltac xomega := unfold Plt, Ple in *; zify; omega.
(** ** Environment of inlinable functions *)
(** We maintain a mapping from function names to their definitions.
In this mapping, we only include functions that are eligible for
inlining, as determined by the external heuristic
[should_inline]. *)
Definition funenv : Type := PTree.t function.
Definition size_fenv (fenv: funenv) := PTree_Properties.cardinal fenv.
Parameter should_inline: ident -> function -> bool.
Definition add_fundef (fenv: funenv) (idf: ident * fundef) : funenv :=
match idf with
| (id, External ef) =>
PTree.remove id fenv
| (id, Internal f) =>
if should_inline id f
then PTree.set id f fenv
else PTree.remove id fenv
end.
Definition remove_vardef (fenv: funenv) (idv: ident * globvar unit) : funenv :=
PTree.remove (fst idv) fenv.
Definition funenv_program (p: program) : funenv :=
List.fold_left remove_vardef p.(prog_vars)
(List.fold_left add_fundef p.(prog_funct) (PTree.empty function)).
(** Resources used by a function. *)
(** Maximum PC (node number) in the CFG of a function. All nodes of
the CFG of [f] are between 1 and [max_pc_function f] (inclusive). *)
Definition max_pc_function (f: function) :=
PTree.fold (fun m pc i => Pmax m pc) f.(fn_code) 1%positive.
(** Maximum pseudo-register defined in a function. All results of
an instruction of [f], as well as all parameters of [f], are between
1 and [max_def_function] (inclusive). *)
Definition max_def_instr (i: instruction) :=
match i with
| Iop op args res s => res
| Iload chunk addr args dst s => dst
| Icall sg ros args res s => res
| Ibuiltin ef args res s => res
| _ => 1%positive
end.
Definition max_def_function (f: function) :=
Pmax
(PTree.fold (fun m pc i => Pmax m (max_def_instr i)) f.(fn_code) 1%positive)
(List.fold_left (fun m r => Pmax m r) f.(fn_params) 1%positive).
(** State monad *)
(** To construct incrementally the CFG of a function after inlining,
we use a state monad similar to that used in module [RTLgen].
It records the current state of the CFG, plus counters to generate
fresh pseudo-registers and fresh CFG nodes. It also records the
stack size needed for the inlined function. *)
Record state : Type := mkstate {
st_nextreg: positive; (**r last used pseudo-register *)
st_nextnode: positive; (**r last used CFG node *)
st_code: code; (**r current CFG *)
st_stksize: Z (**r current stack size *)
}.
(** Monotone evolution of the state. *)
Inductive sincr (s1 s2: state) : Prop :=
Sincr (NEXTREG: Ple s1.(st_nextreg) s2.(st_nextreg))
(NEXTNODE: Ple s1.(st_nextnode) s2.(st_nextnode))
(STKSIZE: s1.(st_stksize) <= s2.(st_stksize)).
Remark sincr_refl: forall s, sincr s s.
Proof.
intros; constructor; xomega.
Qed.
Lemma sincr_trans: forall s1 s2 s3, sincr s1 s2 -> sincr s2 s3 -> sincr s1 s3.
Proof.
intros. inv H; inv H0. constructor; xomega.
Qed.
(** Dependently-typed state monad, ensuring that the final state is
greater or equal (in the sense of predicate [sincr] above) than
the initial state. *)
Inductive res {A: Type} {s: state}: Type := R (x: A) (s': state) (I: sincr s s').
Definition mon (A: Type) : Type := forall (s: state), @res A s.
(** Operations on this monad. *)
Definition ret {A: Type} (x: A): mon A :=
fun s => R x s (sincr_refl s).
Definition bind {A B: Type} (x: mon A) (f: A -> mon B): mon B :=
fun s1 => match x s1 with R vx s2 I1 =>
match f vx s2 with R vy s3 I2 =>
R vy s3 (sincr_trans s1 s2 s3 I1 I2)
end
end.
Notation "'do' X <- A ; B" := (bind A (fun X => B))
(at level 200, X ident, A at level 100, B at level 200).
Definition initstate :=
mkstate 1%positive 1%positive (PTree.empty instruction) 0.
Program Definition set_instr (pc: node) (i: instruction): mon unit :=
fun s =>
R tt
(mkstate s.(st_nextreg) s.(st_nextnode) (PTree.set pc i s.(st_code)) s.(st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition add_instr (i: instruction): mon node :=
fun s =>
let pc := Psucc s.(st_nextnode) in
R pc
(mkstate s.(st_nextreg) pc (PTree.set pc i s.(st_code)) s.(st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition reserve_nodes (numnodes: positive): mon positive :=
fun s =>
R s.(st_nextnode)
(mkstate s.(st_nextreg) (Pplus s.(st_nextnode) numnodes) s.(st_code) s.(st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition reserve_regs (numregs: positive): mon positive :=
fun s =>
R s.(st_nextreg)
(mkstate (Pplus s.(st_nextreg) numregs) s.(st_nextnode) s.(st_code) s.(st_stksize))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Program Definition request_stack (sz: Z): mon unit :=
fun s =>
R tt
(mkstate s.(st_nextreg) s.(st_nextnode) s.(st_code) (Zmax s.(st_stksize) sz))
_.
Next Obligation.
intros; constructor; simpl; xomega.
Qed.
Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit :=
match l with
| nil => ret tt
| (x,y) :: l' => do z <- f x y; mlist_iter2 f l'
end.
(** ** Inlining contexts *)
(** A context describes how to insert the CFG for a source function into
the CFG for the function after inlining:
- a source instruction at PC [n] is relocated to PC [n + ctx.(dpc)];
- all pseudo-registers of this instruction are shifted by [ctx.(dreg)];
- all stack references are shifted by [ctx.(dstk)];
- "return" instructions are transformed into "return" or "move" instructions
as governed by [ctx.(retinfo)].
*)
Record context: Type := mkcontext {
dpc: positive; (**r offset for PCs *)
dreg: positive; (**r offset for pseudo-regs *)
dstk: Z; (**r offset for stack references *)
mreg: positive; (**r max pseudo-reg number *)
mstk: Z; (**r original stack block size *)
retinfo: option(node * reg) (**r where to branch on return *)
(**r and deposit return value *)
}.
(** The following functions "shift" (relocate) PCs, registers, operations, etc. *)
Definition spc (ctx: context) (pc: node) := Pplus pc ctx.(dpc).
Definition sreg (ctx: context) (r: reg) := Pplus r ctx.(dreg).
Definition sregs (ctx: context) (rl: list reg) := List.map (sreg ctx) rl.
Definition sros (ctx: context) (ros: reg + ident) := sum_left_map (sreg ctx) ros.
Definition sop (ctx: context) (op: operation) :=
shift_stack_operation (Int.repr ctx.(dstk)) op.
Definition saddr (ctx: context) (addr: addressing) :=
shift_stack_addressing (Int.repr ctx.(dstk)) addr.
(** The initial context, used to copy the CFG of a toplevel function. *)
Definition initcontext (dpc dreg nreg: positive) (sz: Z) :=
{| dpc := dpc;
dreg := dreg;
dstk := 0;
mreg := nreg;
mstk := Zmax sz 0;
retinfo := None |}.
(** The context used to inline a call to another function. *)
Definition min_alignment (sz: Z) :=
if zle sz 1 then 1
else if zle sz 2 then 2
else if zle sz 4 then 4 else 8.
Definition callcontext (ctx: context)
(dpc dreg nreg: positive) (sz: Z)
(retpc: node) (retreg: reg) :=
{| dpc := dpc;
dreg := dreg;
dstk := align (ctx.(dstk) + ctx.(mstk)) (min_alignment sz);
mreg := nreg;
mstk := Zmax sz 0;
retinfo := Some (spc ctx retpc, sreg ctx retreg) |}.
(** The context used to inline a tail call to another function. *)
Definition tailcontext (ctx: context) (dpc dreg nreg: positive) (sz: Z) :=
{| dpc := dpc;
dreg := dreg;
dstk := align ctx.(dstk) (min_alignment sz);
mreg := nreg;
mstk := Zmax sz 0;
retinfo := ctx.(retinfo) |}.
(** ** Recursive expansion and copying of a CFG *)
(** Insert "move" instructions to copy the arguments of an inlined
function into its parameters. *)
Fixpoint add_moves (srcs dsts: list reg) (succ: node): mon node :=
match srcs, dsts with
| s1 :: sl, d1 :: dl =>
do n <- add_instr (Iop Omove (s1 :: nil) d1 succ);
add_moves sl dl n
| _, _ =>
ret succ
end.
(** To prevent infinite inlining of a recursive function, when we
inline the body of a function [f], this function is removed from the
environment of inlinable functions and therefore becomes ineligible
for inlining. This decreases the size (number of entries) of the
environment and guarantees termination. Inlining is, therefore,
presented as a well-founded recursion over the size of the environment. *)
Section EXPAND_CFG.
Variable fenv: funenv.
(** The [rec] parameter is the recursor: [rec fenv' P ctx f] copies
the body of function [f], with inline expansion within, as governed
by context [ctx]. It can only be called for function environments
[fenv'] strictly smaller than the current environment [fenv]. *)
Variable rec: forall fenv', (size_fenv fenv' < size_fenv fenv)%nat -> context -> function -> mon unit.
(** Given a register-or-symbol [ros], can we inline the corresponding call? *)
Inductive inline_decision (ros: reg + ident) : Type :=
| Cannot_inline
| Can_inline (id: ident) (f: function) (P: ros = inr reg id) (Q: fenv!id = Some f).
Program Definition can_inline (ros: reg + ident): inline_decision ros :=
match ros with
| inl r => Cannot_inline _
| inr id => match fenv!id with Some f => Can_inline _ id f _ _ | None => Cannot_inline _ end
end.
(** Inlining of a call to function [f]. An appropriate context is
created, then the CFG of [f] is recursively copied, then moves
are inserted to copy the arguments of the call to the parameters of [f]. *)
Definition inline_function (ctx: context) (id: ident) (f: function)
(P: PTree.get id fenv = Some f)
(args: list reg)
(retpc: node) (retreg: reg) : mon node :=
let npc := max_pc_function f in
let nreg := max_def_function f in
do dpc <- reserve_nodes npc;
do dreg <- reserve_regs nreg;
let ctx' := callcontext ctx dpc dreg nreg f.(fn_stacksize) retpc retreg in
do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f;
add_moves (sregs ctx args) (sregs ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)).
(** Inlining of a tail call to function [f]. Similar to [inline_function],
but the new context is different. *)
Definition inline_tail_function (ctx: context) (id: ident) (f: function)
(P: PTree.get id fenv = Some f)
(args: list reg): mon node :=
let npc := max_pc_function f in
let nreg := max_def_function f in
do dpc <- reserve_nodes npc;
do dreg <- reserve_regs nreg;
let ctx' := tailcontext ctx dpc dreg nreg f.(fn_stacksize) in
do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f;
add_moves (sregs ctx args) (sregs ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)).
(** The instruction generated for a [Ireturn] instruction found in an
inlined function body. *)
Definition inline_return (ctx: context) (or: option reg) (retinfo: node * reg) :=
match retinfo, or with
| (retpc, retreg), Some r => Iop Omove (sreg ctx r :: nil) retreg retpc
| (retpc, retreg), None => Inop retpc
end.
(** Expansion and copying of an instruction. For most instructions,
its registers and successor PC are shifted as per the context [ctx],
then the instruction is inserted in the final CFG at its final position
[spc ctx pc].
[Icall] instructions are either replaced by a "goto" to the expansion
of the called function, or shifted as described above.
[Itailcall] instructions are similar, with one additional case. If
the [Itailcall] occurs in the body of an inlined function, and
cannot be inlined itself, it must be turned into an [Icall]
instruction that branches to the return point of the inlined
function.
Finally, [Ireturn] instructions within an inlined function are
turned into a "move" or "goto" that stores the result, if any,
into the destination register, then branches back to the successor
of the inlined call. *)
Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
match i with
| Inop s =>
set_instr (spc ctx pc) (Inop (spc ctx s))
| Iop op args res s =>
set_instr (spc ctx pc)
(Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s))
| Iload chunk addr args dst s =>
set_instr (spc ctx pc)
(Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s))
| Istore chunk addr args src s =>
set_instr (spc ctx pc)
(Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s))
| Icall sg ros args res s =>
match can_inline ros with
| Cannot_inline =>
set_instr (spc ctx pc)
(Icall sg (sros ctx ros) (sregs ctx args) (sreg ctx res) (spc ctx s))
| Can_inline id f P Q =>
do n <- inline_function ctx id f Q args s res;
set_instr (spc ctx pc) (Inop n)
end
| Itailcall sg ros args =>
match can_inline ros with
| Cannot_inline =>
match ctx.(retinfo) with
| None =>
set_instr (spc ctx pc)
(Itailcall sg (sros ctx ros) (sregs ctx args))
| Some(rpc, rreg) =>
set_instr (spc ctx pc)
(Icall sg (sros ctx ros) (sregs ctx args) rreg rpc)
end
| Can_inline id f P Q =>
do n <- inline_tail_function ctx id f Q args;
set_instr (spc ctx pc) (Inop n)
end
| Ibuiltin ef args res s =>
set_instr (spc ctx pc)
(Ibuiltin ef (sregs ctx args) (sreg ctx res) (spc ctx s))
| Icond cond args s1 s2 =>
set_instr (spc ctx pc)
(Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2))
| Ijumptable r tbl =>
set_instr (spc ctx pc)
(Ijumptable (sreg ctx r) (List.map (spc ctx) tbl))
| Ireturn or =>
match ctx.(retinfo) with
| None =>
set_instr (spc ctx pc) (Ireturn (option_map (sreg ctx) or))
| Some rinfo =>
set_instr (spc ctx pc) (inline_return ctx or rinfo)
end
end.
(** The expansion of a function [f] iteratively expands all its
instructions, after recording how much stack it needs. *)
Definition expand_cfg_rec (ctx: context) (f: function): mon unit :=
do x <- request_stack (ctx.(dstk) + ctx.(mstk));
mlist_iter2 (expand_instr ctx) (PTree.elements f.(fn_code)).
End EXPAND_CFG.
(** Here we "tie the knot" of the recursion, taking the fixpoint
of [expand_cfg_rec]. *)
Definition expand_cfg := Fixm size_fenv expand_cfg_rec.
(** Start of the recursion: copy and inline function [f] in the
initial context. *)
Definition expand_function (fenv: funenv) (f: function): mon context :=
let npc := max_pc_function f in
let nreg := max_def_function f in
do dpc <- reserve_nodes npc;
do dreg <- reserve_regs nreg;
let ctx := initcontext dpc dreg nreg f.(fn_stacksize) in
do x <- expand_cfg fenv ctx f;
ret ctx.
(** ** Inlining in functions and whole programs. *)
Local Open Scope string_scope.
(** Inlining can increase the size of the function's stack block. We must
make sure that the new size does not exceed [Int.max_unsigned], otherwise
address computations within the stack would overflow and produce incorrect
results. *)
Definition transf_function (fenv: funenv) (f: function) : Errors.res function :=
let '(R ctx s _) := expand_function fenv f initstate in
if zle s.(st_stksize) Int.max_unsigned then
OK (mkfunction f.(fn_sig)
(sregs ctx f.(fn_params))
s.(st_stksize)
s.(st_code)
(spc ctx f.(fn_entrypoint)))
else
Error(msg "Inlining: stack too big").
Definition transf_fundef (fenv: funenv) (fd: fundef) : Errors.res fundef :=
AST.transf_partial_fundef (transf_function fenv) fd.
Definition transf_program (p: program): Errors.res program :=
let fenv := funenv_program p in
AST.transform_partial_program (transf_fundef fenv) p.
|