(* *********************************************************************)
(*                                                                     *)
(*              The Compcert verified compiler                         *)
(*                                                                     *)
(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
(*                                                                     *)
(*  Copyright Institut National de Recherche en Informatique et en     *)
(*  Automatique.  All rights reserved.  This file is distributed       *)
(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
(*                                                                     *)
(* *********************************************************************)

(* Register allocation by coloring of an interference graph *)

(* The algorithm in a nutshell:

   - Split live ranges
   - Convert from RTL to XTL
   - Eliminate dead code
   - Repeat:
       . Construct interference graph
       . Color interference graph using IRC algorithm
       . Check for variables that were spilled and must be in registers
       . If none, convert to LTL and exit.
       . If some, insert spill and reload instructions and try again
     End Repeat
*)

open Printf
open Clflags
open Camlcoq
open Datatypes
open Coqlib
open Maps
open AST
open Memdata
open Kildall
open Registers
open Op
open Machregs
open Locations
open Conventions1
open Conventions
open IRC
open XTL

(* Detection of 2-address operations *)

let is_two_address op args =
  if two_address_op op then
    match args with
    | [] -> assert false
    | arg1 :: argl -> Some(arg1, argl)
  else None

(* For tracing *)

let destination_alloctrace : string option ref = ref None
let pp = ref stdout

let init_trace () =
  if !option_dalloctrace && !pp == stdout then begin
    match !destination_alloctrace with
    | None -> ()  (* should not happen *)
    | Some f -> pp := open_out f
  end


(**************** Initial conversion from RTL to XTL **************)

let vreg tyenv r = V(r, tyenv r)

let vregs tyenv rl = List.map (vreg tyenv) rl

let rec expand_regs tyenv = function
  | [] -> []
  | r :: rl ->
      match tyenv r with
      | Tlong -> V(r, Tint) :: V(twin_reg r, Tint) :: expand_regs tyenv rl
      | ty    -> V(r, ty) :: expand_regs tyenv rl

let constrain_reg v c =
  match c with
  | None -> v
  | Some mr -> L(R mr)

let rec constrain_regs vl cl =
  match vl, cl with
  | [], _ -> []
  | v1 :: vl', [] -> vl
  | v1 :: vl', Some mr1 :: cl' -> L(R mr1) :: constrain_regs vl' cl'
  | v1 :: vl', None :: cl' -> v1 :: constrain_regs vl' cl'

let move v1 v2 k =
  if v1 = v2 then
    k
  else if is_stack_reg v1 then begin
    let t = new_temp (typeof v2) in Xmove(v1, t) :: Xmove(t, v2) :: k
  end else if is_stack_reg v2 then begin
    let t = new_temp (typeof v1) in Xmove(v1, t) :: Xmove(t, v2) :: k
  end else
    Xmove(v1, v2) :: k

let rec movelist vl1 vl2 k =
  match vl1, vl2 with
  | [], [] -> k
  | v1 :: vl1, v2 :: vl2 -> move v1 v2 (movelist vl1 vl2 k)
  | _, _ -> assert false

let xparmove srcs dsts k =
  assert (List.length srcs = List.length dsts);
  match srcs, dsts with
  | [], [] -> k
  | [src], [dst] -> move src dst k
  | _, _ -> Xparmove(srcs, dsts, new_temp Tint, new_temp Tfloat) :: k

(* Return the XTL basic block corresponding to the given RTL instruction.
   Move and parallel move instructions are introduced to honor calling
   conventions and register constraints on some operations.
   64-bit integer variables are split in two 32-bit halves. *)

let block_of_RTL_instr funsig tyenv = function
  | RTL.Inop s ->
      [Xbranch s]
  | RTL.Iop(Omove, [arg], res, s) ->
      if tyenv arg = Tlong then
        [Xmove(V(arg, Tint), V(res, Tint));
         Xmove(V(twin_reg arg, Tint), V(twin_reg res, Tint));
         Xbranch s]
      else
        [Xmove(vreg tyenv arg, vreg tyenv res); Xbranch s]
  | RTL.Iop(Omakelong, [arg1; arg2], res, s) ->
      [Xmove(V(arg1, Tint), V(res, Tint));
       Xmove(V(arg2, Tint), V(twin_reg res, Tint));
       Xbranch s]
  | RTL.Iop(Olowlong, [arg], res, s) ->
      [Xmove(V(twin_reg arg, Tint), V(res, Tint)); Xbranch s]
  | RTL.Iop(Ohighlong, [arg], res, s) ->
      [Xmove(V(arg, Tint), V(res, Tint)); Xbranch s]
  | RTL.Iop(op, args, res, s) ->
      let (cargs, cres) = mregs_for_operation op in
      let args1 = vregs tyenv args and res1 = vreg tyenv res in
      let args2 = constrain_regs args1 cargs and res2 = constrain_reg res1 cres in
      let (args3, res3) =
        match is_two_address op args2 with
        | None ->
            (args2, res2)
        | Some(arg, args2') ->
            if arg = res2 || not (List.mem res2 args2') then
              (args2, res2)
            else
              let t = new_temp (tyenv res) in (t :: args2', t) in
      movelist args1 args3 (Xop(op, args3, res3) :: move res3 res1 [Xbranch s])
  | RTL.Iload(chunk, addr, args, dst, s) ->
      if chunk = Mint64 then begin
        match offset_addressing addr (coqint_of_camlint 4l) with
        | None -> assert false
        | Some addr' ->
            [Xload(Mint32, addr, vregs tyenv args,
                   V((if Archi.big_endian then dst else twin_reg dst), Tint));
             Xload(Mint32, addr', vregs tyenv args,
                   V((if Archi.big_endian then twin_reg dst else dst), Tint));
             Xbranch s]
      end else
        [Xload(chunk, addr, vregs tyenv args, vreg tyenv dst); Xbranch s]
  | RTL.Istore(chunk, addr, args, src, s) ->
      if chunk = Mint64 then begin
        match offset_addressing addr (coqint_of_camlint 4l) with
        | None -> assert false
        | Some addr' ->
            [Xstore(Mint32, addr, vregs tyenv args,
                   V((if Archi.big_endian then src else twin_reg src), Tint));
             Xstore(Mint32, addr', vregs tyenv args,
                   V((if Archi.big_endian then twin_reg src else src), Tint));
             Xbranch s]
      end else
        [Xstore(chunk, addr, vregs tyenv args, vreg tyenv src); Xbranch s]
  | RTL.Icall(sg, ros, args, res, s) ->
      let args' = vlocs (loc_arguments sg)
      and res' = vmregs (loc_result sg) in
      xparmove (expand_regs tyenv args) args'
       (Xcall(sg, sum_left_map (vreg tyenv) ros, args', res') ::
         xparmove res' (expand_regs tyenv [res]) 
           [Xbranch s])
  | RTL.Itailcall(sg, ros, args) ->
      let args' = vlocs (loc_arguments sg) in
      xparmove (expand_regs tyenv args) args'
        [Xtailcall(sg, sum_left_map (vreg tyenv) ros, args')]
  | RTL.Ibuiltin(ef, args, res, s) ->
      let (cargs, cres) = mregs_for_builtin ef in
      let args1 = expand_regs tyenv args and res1 = expand_regs tyenv [res] in
      let args2 = constrain_regs args1 cargs and res2 = constrain_regs res1 cres in
      movelist args1 args2
         (Xbuiltin(ef, args2, res2) :: movelist res2 res1 [Xbranch s])
  | RTL.Icond(cond, args, s1, s2) ->
      [Xcond(cond, vregs tyenv args, s1, s2)]
  | RTL.Ijumptable(arg, tbl) ->
      [Xjumptable(vreg tyenv arg, tbl)]
  | RTL.Ireturn None ->
      [Xreturn []]
  | RTL.Ireturn (Some arg) ->
      let args' = vmregs (loc_result funsig) in
      xparmove (expand_regs tyenv [arg]) args' [Xreturn args']

(* One above the [pc] nodes of the given RTL function *)

let next_pc f =
  PTree.fold
    (fun npc pc i -> if P.lt pc npc then npc else P.succ pc)
    f.RTL.fn_code P.one

(* Translate an RTL function to an XTL function *)

let function_of_RTL_function f tyenv =
  let xc = PTree.map1 (block_of_RTL_instr f.RTL.fn_sig tyenv) f.RTL.fn_code in
  (* Add moves for function parameters *)
  let pc_entrypoint = next_pc f in
  let b_entrypoint = 
     xparmove (vlocs (loc_parameters f.RTL.fn_sig))
              (expand_regs tyenv f.RTL.fn_params)
              [Xbranch f.RTL.fn_entrypoint] in
  { fn_sig = f.RTL.fn_sig;
    fn_stacksize = f.RTL.fn_stacksize;
    fn_entrypoint = pc_entrypoint;
    fn_code = PTree.set pc_entrypoint b_entrypoint xc }


(***************** Liveness analysis *****************)

let vset_removelist vl after = List.fold_right VSet.remove vl after
let vset_addlist vl after = List.fold_right VSet.add vl after
let vset_addros vos after =
  match vos with Coq_inl v -> VSet.add v after | Coq_inr id -> after

let live_before instr after =
  match instr with
  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
      if VSet.mem dst after || is_stack_reg src
      then VSet.add src (VSet.remove dst after)
      else after
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      vset_addlist srcs (vset_removelist dsts after)
  | Xop(op, args, res) ->
      if VSet.mem res after
      then vset_addlist args (VSet.remove res after)
      else after
  | Xload(chunk, addr, args, dst) ->
      if VSet.mem dst after
      then vset_addlist args (VSet.remove dst after)
      else after
  | Xstore(chunk, addr, args, src) ->
      vset_addlist args (VSet.add src after)
  | Xcall(sg, ros, args, res) ->
      vset_addlist args (vset_addros ros (vset_removelist res after))
  | Xtailcall(sg, ros, args) ->
      vset_addlist args (vset_addros ros VSet.empty)
  | Xbuiltin(ef, args, res) ->
      vset_addlist args (vset_removelist res after)
  | Xbranch s ->
      after
  | Xcond(cond, args, s1, s2) ->
      List.fold_right VSet.add args after
  | Xjumptable(arg, tbl) ->
      VSet.add arg after
  | Xreturn args ->
      vset_addlist args VSet.empty

let rec live_before_block blk after =
  match blk with
  | [] -> after
  | instr :: blk -> live_before instr (live_before_block blk after)

let transfer_live f pc after =
  match PTree.get pc f.fn_code with
  | None -> VSet.empty
  | Some blk -> live_before_block blk after

module VSetLat = struct
  type t = VSet.t
  let beq = VSet.equal
  let bot = VSet.empty
  let lub = VSet.union
end

module Liveness_Solver = Backward_Dataflow_Solver(VSetLat)(NodeSetBackward)

let liveness_analysis f =
  match Liveness_Solver.fixpoint f.fn_code successors_block (transfer_live f) with
  | None -> assert false
  | Some lv -> lv

(* Pair the instructions of a block with their live-before sets *)

let pair_block_live blk after =
  let rec pair_rec accu after = function
  | [] -> accu
  | instr :: blk ->
      let before = live_before instr after in
      pair_rec ((instr, before) :: accu) before blk in
  pair_rec [] after (List.rev blk)


(**************** Dead code elimination **********************)

(* Eliminate pure instructions whose results are not used later. *)

let rec dce_parmove srcs dsts after =
  match srcs, dsts with
  | [], [] -> [], []
  | src1 :: srcs, dst1 :: dsts ->
      let (srcs', dsts') = dce_parmove srcs dsts after in
      if VSet.mem dst1 after || is_stack_reg src1
      then (src1 :: srcs', dst1 :: dsts')
      else (srcs', dsts')
  | _, _ -> assert false

let dce_instr instr after k =
  match instr with
  | Xmove(src, dst) ->
      if VSet.mem dst after || is_stack_reg src
      then instr :: k
      else k
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      begin match dce_parmove srcs dsts after with
      | ([], []) -> k
      | ([src], [dst]) -> Xmove(src, dst) :: k
      | (srcs', dsts') -> Xparmove(srcs', dsts', itmp, ftmp) :: k
      end
  | Xop(op, args, res) ->
      if VSet.mem res after
      then instr :: k
      else k
  | Xload(chunk, addr, args, dst) ->
      if VSet.mem dst after
      then instr :: k
      else k
  | _ ->
      instr :: k

let rec dce_block blk after =
  match blk with
  | [] -> (after, [])
  | instr :: blk' ->
      let (after', tblk') = dce_block blk' after in
      (live_before instr after', dce_instr instr after' tblk')

let dead_code_elimination f liveness =
  { f with fn_code =
      PTree.map (fun pc blk -> snd(dce_block blk (PMap.get pc liveness)))
                 f.fn_code }


(*********************** Spill costs ****************************)

(* Estimate spill costs and count other statistics for every variable.
   Variables that must not be spilled are given infinite costs. *)

let spill_costs f =

  let costs = ref PTree.empty in

  let get_stats r =
    match PTree.get r !costs with
    | Some st -> st
    | None ->
        let st = {cost = 0; usedefs = 0} in
        costs := PTree.set r st !costs;
        st in

  let charge amount uses v =
    match v with
    | L l -> ()
    | V(r, ty) ->
        let st = get_stats r in
        if st.cost < 0 then
          (* the variable must be spilled, don't change its cost *)
          assert (amount < max_int)
        else begin
          (* saturating addition *)
          let c1 = st.cost + amount in
          let c2 = if c1 >= 0 then c1 else max_int (* overflow *) in
          st.cost <- c2
        end;           
        st.usedefs <- st.usedefs + uses in

  let charge_list amount uses vl =
    List.iter (charge amount uses) vl in

  let charge_ros amount ros =
    match ros with Coq_inl v -> charge amount 1 v | Coq_inr id -> () in

  let force_stack_allocation v =
    match v with
    | L l -> ()
    | V(r, ty) ->
        let st = get_stats r in
        assert (st.cost < max_int);
        st.cost <- (-1) in

  let charge_instr = function
    | Xmove(src, dst) ->
        if is_stack_reg src then
          force_stack_allocation dst
        else if is_stack_reg dst then
          force_stack_allocation src
        else begin
          charge 1 1 src; charge 1 1 dst
        end
    | Xreload(src, dst) ->
        charge 1 1 src; charge max_int 1 dst
        (* dest must not be spilled! *)
    | Xspill(src, dst) ->
        charge max_int 1 src; charge 1 1 dst
        (* source must not be spilled! *)
    | Xparmove(srcs, dsts, itmp, ftmp) ->
        charge_list 1 1 srcs; charge_list 1 1 dsts;
        charge max_int 0 itmp; charge max_int 0 ftmp
        (* temps must not be spilled *)
    | Xop(op, args, res) ->
        charge_list 10 1 args; charge 10 1 res
    | Xload(chunk, addr, args, dst) ->
        charge_list 10 1 args; charge 10 1 dst
    | Xstore(chunk, addr, args, src) ->
        charge_list 10 1 args; charge 10 1 src
    | Xcall(sg, vos, args, res) ->
        charge_ros 10 vos
    | Xtailcall(sg, vos, args) ->
        charge_ros 10 vos
    | Xbuiltin(ef, args, res) ->
        begin match ef with
        | EF_vstore _ | EF_vstore_global _ | EF_memcpy _ ->
            (* result is not used but should not be spilled *)
            charge_list 10 1 args; charge_list max_int 0 res
        | EF_annot _ ->
            (* arguments are not actually used, so don't charge;
               result is never used but should not be spilled *)
            charge_list max_int 0 res
        | EF_annot_val _ ->
            (* like a move *)
            charge_list 1 1 args; charge_list 1 1 res
        | _ ->
            charge_list 10 1 args; charge_list 10 1 res
        end
    | Xbranch _ -> ()
    | Xcond(cond, args, _, _) ->
        charge_list 10 1 args
    | Xjumptable(arg, _) ->
        charge 10 1 arg
    | Xreturn optarg ->
        () in

  let charge_block blk = List.iter charge_instr blk in

  PTree.fold1
    (fun () blk -> charge_block blk)
    f.fn_code ();
  if !option_dalloctrace then begin
    fprintf !pp "------------------ Unspillable variables --------------\n\n";
    PTree.fold
      (fun () r st ->
        if st.cost = max_int then fprintf !pp "x%d " (P.to_int r))
      !costs ();
    fprintf !pp "\n\n"
  end;
  (* Result is cost function: pseudoreg -> stats *)
  get_stats


(********* Construction and coloring of the interference graph **************)

let add_interfs_def g res live =
  VSet.iter (fun v -> if v <> res then IRC.add_interf g v res) live

let add_interfs_move g src dst live =
  VSet.iter (fun v -> if v <> src && v <> dst then IRC.add_interf g v dst) live

let add_interfs_destroyed g live mregs =
  List.iter
    (fun mr -> VSet.iter (IRC.add_interf g (L (R mr))) live)
    mregs

let add_interfs_live g live v =
  VSet.iter (fun v' -> IRC.add_interf g v v') live

let add_interfs_list g v vl =
  List.iter (IRC.add_interf g v) vl

let rec add_interfs_pairwise g = function
  | [] -> ()
  | v1 :: vl -> add_interfs_list g v1 vl; add_interfs_pairwise g vl

let add_interfs_instr g instr live =
  match instr with
  | Xmove(src, dst) | Xspill(src, dst) | Xreload(src, dst) ->
      IRC.add_pref g src dst;
      add_interfs_move g src dst live
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      List.iter2 (IRC.add_pref g) srcs dsts;
      (* Interferences with live across *)
      let across = vset_removelist dsts live in
      List.iter (add_interfs_live g across) dsts;
      add_interfs_live g across itmp; add_interfs_live g across ftmp;
      (* All destinations must be pairwise different *)
      add_interfs_pairwise g dsts;
      (* The temporaries must be different from sources and dests *)
      add_interfs_list g itmp srcs; add_interfs_list g itmp dsts;
      add_interfs_list g ftmp srcs; add_interfs_list g ftmp dsts;
      (* Take into account destroyed reg when accessing Incoming param *)
      if List.exists (function (L(S(Incoming, _, _))) -> true | _ -> false) srcs
      then add_interfs_list g (vmreg temp_for_parent_frame) dsts;
      (* Take into account destroyed reg when initializing Outgoing
         arguments of type Tsingle *)
      if List.exists
           (function (L(S(Outgoing, _, Tsingle))) -> true | _ -> false) dsts
      then
        List.iter
          (fun mr ->
            add_interfs_list g (vmreg mr) srcs;
            IRC.add_interf g (vmreg mr) ftmp)
          (destroyed_by_setstack Tsingle)
  | Xop(Ofloatofsingle, arg1::_, res) when Configuration.arch = "powerpc" ->
      add_interfs_def g res live;
      IRC.add_pref g arg1 res
  | Xop(op, args, res) ->
      begin match is_two_address op args with
      | None ->
          add_interfs_def g res live
      | Some(arg1, argl) ->
          (* Treat as "res := arg1; res := op(res, argl)" *)
          add_interfs_def g res live;
          IRC.add_pref g arg1 res;
          add_interfs_move g arg1 res
            (vset_addlist (res :: argl) (VSet.remove res live))
      end;
      add_interfs_destroyed g (VSet.remove res live) (destroyed_by_op op)
  | Xload(chunk, addr, args, dst) ->
      add_interfs_def g dst live;
      add_interfs_destroyed g (VSet.remove dst live) 
                              (destroyed_by_load chunk addr)
  | Xstore(chunk, addr, args, src) ->
      add_interfs_destroyed g live (destroyed_by_store chunk addr)
  | Xcall(sg, vos, args, res) ->
      add_interfs_destroyed g (vset_removelist res live) destroyed_at_call
  | Xtailcall(sg, Coq_inl v, args) ->
      List.iter (fun r -> IRC.add_interf g (vmreg r) v) int_callee_save_regs
  | Xtailcall(sg, Coq_inr id, args) ->
      ()
  | Xbuiltin(ef, args, res) ->
      (* Interferences with live across *)
      let across = vset_removelist res live in
      List.iter (add_interfs_live g across) res;
      (* All results must be pairwise different *)
      add_interfs_pairwise g res;
      add_interfs_destroyed g across (destroyed_by_builtin ef);
      begin match ef, args, res with
      | EF_annot_val _, [arg], [res] -> IRC.add_pref g arg res (* like a move *)
      | _ -> ()
      end
  | Xbranch s ->
      ()
  | Xcond(cond, args, s1, s2) ->
      add_interfs_destroyed g live (destroyed_by_cond cond)
  | Xjumptable(arg, tbl) ->
      add_interfs_destroyed g live destroyed_by_jumptable
  | Xreturn optarg ->
      ()

let rec add_interfs_block g blk live =
  match blk with
  | [] -> live
  | instr :: blk' ->
      let live' = add_interfs_block g blk' live in
      add_interfs_instr g instr live';
      live_before instr live'

let find_coloring f liveness =
  (*type_function f;  (* for debugging *)*)
  let g = IRC.init (spill_costs f) in
  PTree.fold
    (fun () pc blk -> ignore (add_interfs_block g blk (PMap.get pc liveness)))
    f.fn_code ();
  add_interfs_destroyed g
    (transfer_live f f.fn_entrypoint (PMap.get f.fn_entrypoint liveness))
    destroyed_at_function_entry;
  IRC.coloring g


(*********** Determination of variables that need spill code insertion *****)

let is_reg alloc v =
  match alloc v with R _ -> true | S _ -> false

let add_tospill alloc v ts =
  match alloc v with R _ -> ts | S _ -> VSet.add v ts

let addlist_tospill alloc vl ts =
  List.fold_right (add_tospill alloc) vl ts

let addros_tospill alloc ros ts =
  match ros with Coq_inl v -> add_tospill alloc v ts | Coq_inr s -> ts

let tospill_instr alloc instr ts =
  match instr with
  | Xmove(src, dst) ->
      if is_reg alloc src || is_reg alloc dst || alloc src = alloc dst
      then ts
      else VSet.add src (VSet.add dst ts)
  | Xreload(src, dst) ->
      if not (is_reg alloc dst) then begin
        printf "Error: %a was spilled\n" PrintXTL.var dst;
        assert false
      end;
      ts
  | Xspill(src, dst) ->
      if not (is_reg alloc src) then begin
        printf "Error: %a was spilled\n" PrintXTL.var src;
        assert false
      end;
      ts
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      assert (is_reg alloc itmp && is_reg alloc ftmp);
      ts
  | Xop(op, args, res) ->
      addlist_tospill alloc args (add_tospill alloc res ts)
  | Xload(chunk, addr, args, dst) ->
      addlist_tospill alloc args (add_tospill alloc dst ts)
  | Xstore(chunk, addr, args, src) ->
      addlist_tospill alloc args (add_tospill alloc src ts)
  | Xcall(sg, vos, args, res) ->
      addros_tospill alloc vos ts
  | Xtailcall(sg, vos, args) ->
      addros_tospill alloc vos ts
  | Xbuiltin(ef, args, res) ->
      begin match ef with
      | EF_annot _ -> ts
      |      _     -> addlist_tospill alloc args (addlist_tospill alloc res ts)
      end
  | Xbranch s ->
      ts
  | Xcond(cond, args, s1, s2) ->
      addlist_tospill alloc args ts
  | Xjumptable(arg, tbl) ->
      add_tospill alloc arg ts
  | Xreturn optarg ->
      ts

let rec tospill_block alloc blk ts =
  match blk with
  | [] -> ts
  | instr :: blk' -> tospill_block alloc blk' (tospill_instr alloc instr ts)

let tospill_function f alloc =
  PTree.fold1
    (fun ts blk -> tospill_block alloc blk ts)
    f.fn_code VSet.empty


(********************* Spilling ***********************)

(* We follow a semi-naive spilling strategy.  By default, we spill at
  every definition of a variable that could not be allocated a register,
  and we reload before every use.  However, we also maintain a list of
  equations of the form [spilled-var = temp] that keep track of
  variables that were recently spilled or reloaded.  Based on these
  equations, we can avoid reloading a spilled variable if its value
  is still available in a temporary register. *)

let rec find_reg_containing v = function
  | [] -> None
  | (var, temp, date) :: eqs ->
      if var = v then Some temp else find_reg_containing v eqs

let add v t eqs = (v, t, 0) :: eqs

let kill x eqs =
  List.filter (fun (v, t, date) -> v <> x && t <> x) eqs
  
let reload_var tospill eqs v =
  if not (VSet.mem v tospill) then
    (v, [], eqs)
  else
    match find_reg_containing v eqs with
    | Some t ->
        (*printf "Reusing %a for %a@ @." PrintXTL.var t PrintXTL.var v;*)
        (t, [], eqs)
    | None ->
        let t = new_temp (typeof v) in (t, [Xreload(v, t)], add v t eqs)

let rec reload_vars tospill eqs vl =
  match vl with
  | [] -> ([], [], eqs)
  | v1 :: vs ->
      let (t1, c1, eqs1) = reload_var tospill eqs v1 in
      let (ts, cs, eqs2) = reload_vars tospill eqs1 vs in
      (t1 :: ts, c1 @ cs, eqs2)

let save_var tospill eqs v =
  if not (VSet.mem v tospill) then
    (v, [], kill v eqs)
  else begin
    let t = new_temp (typeof v) in
    (t, [Xspill(t, v)], add v t (kill v eqs))
  end

let rec save_vars tospill eqs vl =
  match vl with
  | [] -> ([], [], eqs)
  | v1 :: vs ->
      let (t1, c1, eqs1) = save_var tospill eqs v1 in
      let (ts, cs, eqs2) = save_vars tospill eqs1 vs in
      (t1 :: ts, c1 @ cs, eqs2)

(* Trimming equations when we have too many or when they are too old.
   The goal is to limit the live range of unspillable temporaries.
   By setting [max_age] to zero, we can effectively deactivate
   the reuse strategy and fall back to a naive "reload at every use"
   strategy. *)

let max_age = ref 3
let max_num_eqs = ref 3

let rec trim count eqs =
  if count <= 0 then [] else
    match eqs with
    | [] -> []
    | (v, t, date) :: eqs' -> 
        if date <= !max_age
        then (v, t, date + 1) :: trim (count - 1) eqs'
        else []

(* Insertion of spill and reload instructions. *)

let spill_instr tospill eqs instr =
  let eqs = trim !max_num_eqs eqs in
  match instr with
  | Xmove(src, dst) ->
      if VSet.mem src tospill && VSet.mem dst tospill then begin
        let (src', c1, eqs1) = reload_var tospill eqs src in
        (c1 @ [Xspill(src', dst)], add dst src' (kill dst eqs1))
      end else begin
        ([instr], kill dst eqs)
      end
  | Xreload(src, dst) ->
      assert false
  | Xspill(src, dst) ->
      assert false
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      ([instr], List.fold_right kill dsts eqs)
  | Xop(op, args, res) ->
      begin match is_two_address op args with
      | None ->
          let (args', c1, eqs1) = reload_vars tospill eqs args in
          let (res', c2, eqs2) = save_var tospill eqs1 res in
          (c1 @ Xop(op, args', res') :: c2, eqs2)
      | Some(arg1, argl) ->
          begin match VSet.mem res tospill, VSet.mem arg1 tospill with
          | false, false ->
              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
              (c1 @ [Xop(op, arg1 :: argl', res)], kill res eqs1)
          | true, false ->
              let tmp = new_temp (typeof res) in
              let (argl', c1, eqs1) = reload_vars tospill eqs argl in
              (c1 @ [Xmove(arg1, tmp); Xop(op, tmp :: argl', tmp); Xspill(tmp, res)], 
               add res tmp (kill res eqs1))
          | false, true ->
              let eqs1 = add arg1 res (kill res eqs) in
              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
              (Xreload(arg1, res) :: c1 @ [Xop(op, res :: argl', res)],
               kill res eqs2)
          | true, true ->
              let tmp = new_temp (typeof res) in              
              let eqs1 = add arg1 tmp eqs in
              let (argl', c1, eqs2) = reload_vars tospill eqs1 argl in
              (Xreload(arg1, tmp) :: c1 @ [Xop(op, tmp :: argl', tmp); Xspill(tmp, res)],
               add res tmp (kill tmp (kill res eqs2)))
          end
      end          
  | Xload(chunk, addr, args, dst) ->
      let (args', c1, eqs1) = reload_vars tospill eqs args in
      let (dst', c2, eqs2) = save_var tospill eqs1 dst in
      (c1 @ Xload(chunk, addr, args', dst') :: c2, eqs2)
  | Xstore(chunk, addr, args, src) ->
      let (args', c1, eqs1) = reload_vars tospill eqs args in
      let (src', c2, eqs2) = reload_var tospill eqs1 src in
      (c1 @ c2 @ [Xstore(chunk, addr, args', src')], eqs2)
  | Xcall(sg, Coq_inl v, args, res) ->
      let (v', c1, eqs1) = reload_var tospill eqs v in
      (c1 @ [Xcall(sg, Coq_inl v', args, res)], [])
  | Xcall(sg, Coq_inr id, args, res) ->
      ([instr], [])
  | Xtailcall(sg, Coq_inl v, args) ->
      let (v', c1, eqs1) = reload_var tospill eqs v in
      (c1 @ [Xtailcall(sg, Coq_inl v', args)], [])
  | Xtailcall(sg, Coq_inr id, args) ->
      ([instr], [])
  | Xbuiltin(ef, args, res) ->
      begin match ef with
      | EF_annot _ ->
          ([instr], eqs)
      | _ ->
          let (args', c1, eqs1) = reload_vars tospill eqs args in
          let (res', c2, eqs2) = save_vars tospill eqs1 res in
          (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
      end
  | Xbranch s ->
      ([instr], eqs)
  | Xcond(cond, args, s1, s2) ->
     let (args', c1, eqs1) = reload_vars tospill eqs args in
     (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
  | Xjumptable(arg, tbl) ->
      let (arg', c1, eqs1) = reload_var tospill eqs arg in
      (c1 @ [Xjumptable(arg', tbl)], eqs1)
  | Xreturn optarg ->
      ([instr], [])

let rec spill_block tospill pc blk eqs =
  match blk with
  | [] -> ([], eqs)
  | instr :: blk' ->
      let (c1, eqs1) = spill_instr tospill eqs instr in
      let (c2, eqs2) = spill_block tospill pc blk' eqs1 in
      (c1 @ c2, eqs2)

(*
let spill_block tospill pc blk eqs =
  printf "@[<hov 2>spill_block: at %ld: " (camlint_of_positive pc);
  List.iter (fun (x,y,d) -> printf "@ %a=%a" PrintXTL.var x PrintXTL.var y) eqs;
  printf "@]@.";
  spill_block tospill pc blk eqs
*)

let spill_function f tospill round =
  max_num_eqs := 3;
  max_age := (if round <= 10 then 3 else if round <= 20 then 1 else 0);
  transform_basic_blocks (spill_block tospill) [] f


(***************** Generation of LTL from XTL ***********************)

(** Apply a register allocation to an XTL function, producing an LTL function.
  Raise [Bad_LTL] if some pseudoregisters were mapped to stack locations
  while machine registers were expected, or in other words if spilling
  and reloading code must be inserted. *)

exception Bad_LTL

let mreg_of alloc v = match alloc v with R mr -> mr | S _ -> raise Bad_LTL

let mregs_of alloc vl = List.map (mreg_of alloc) vl

let mros_of alloc vos = sum_left_map (mreg_of alloc) vos

let make_move src dst k =
  match src, dst with
  | R rsrc, R rdst ->
      if rsrc = rdst then k else LTL.Lop(Omove, [rsrc], rdst) :: k
  | R rsrc, S(sl, ofs, ty) ->
      LTL.Lsetstack(rsrc, sl, ofs, ty) :: k
  | S(sl, ofs, ty), R rdst ->
      LTL.Lgetstack(sl, ofs, ty, rdst) :: k
  | S _, S _ ->
      if src = dst then k else raise Bad_LTL

type parmove_status = To_move | Being_moved | Moved

let make_parmove srcs dsts itmp ftmp k =
  let src = Array.of_list srcs
  and dst = Array.of_list dsts in
  let n = Array.length src in
  assert (Array.length dst = n);
  let status = Array.make n To_move in
  let temp_for =
    function (Tint|Tany32) -> itmp
           | (Tfloat|Tsingle|Tany64) -> ftmp
           | Tlong -> assert false in
  let code = ref [] in
  let add_move s d =
    match s, d with
    | R rs, R rd ->
        code := LTL.Lop(Omove, [rs], rd) :: !code
    | R rs, S(sl, ofs, ty) ->
        code := LTL.Lsetstack(rs, sl, ofs, ty) :: !code
    | S(sl, ofs, ty), R rd ->
        code := LTL.Lgetstack(sl, ofs, ty, rd) :: !code
    | S(sls, ofss, tys), S(sld, ofsd, tyd) ->
        let tmp = temp_for tys in
        (* code will be reversed at the end *)
        code := LTL.Lsetstack(tmp, sld, ofsd, tyd) ::
                LTL.Lgetstack(sls, ofss, tys, tmp) :: !code
    in
  let rec move_one i =
    if src.(i) <> dst.(i) then begin
      status.(i) <- Being_moved;
      for j = 0 to n - 1 do
        if src.(j) = dst.(i) then
          match status.(j) with
          | To_move ->
              move_one j
          | Being_moved ->
              let tmp = R (temp_for (Loc.coq_type src.(j))) in
              add_move src.(j) tmp;
              src.(j) <- tmp
          | Moved ->
              ()
      done;
      add_move src.(i) dst.(i);
      status.(i) <- Moved
    end in
  for i = 0 to n - 1 do
    if status.(i) = To_move then move_one i
  done;
  List.rev_append !code k

let transl_instr alloc instr k =
  match instr with
  | Xmove(src, dst) | Xreload(src, dst) | Xspill(src, dst) ->
      make_move (alloc src) (alloc dst) k
  | Xparmove(srcs, dsts, itmp, ftmp) ->
      make_parmove (List.map alloc srcs) (List.map alloc dsts)
                   (mreg_of alloc itmp) (mreg_of alloc ftmp) k
  | Xop(op, args, res) ->
      let rargs = mregs_of alloc args
      and rres  = mreg_of alloc res in
      begin match is_two_address op rargs with
      | None ->
          LTL.Lop(op, rargs, rres) :: k
      | Some(rarg1, rargl) ->
          if rarg1 = rres then
            LTL.Lop(op, rargs, rres) :: k
          else
            LTL.Lop(Omove, [rarg1], rres) :: 
            LTL.Lop(op, rres :: rargl, rres) :: k
      end
  | Xload(chunk, addr, args, dst) ->
      LTL.Lload(chunk, addr, mregs_of alloc args, mreg_of alloc dst) :: k
  | Xstore(chunk, addr, args, src) ->
      LTL.Lstore(chunk, addr, mregs_of alloc args, mreg_of alloc src) :: k
  | Xcall(sg, vos, args, res) ->
      LTL.Lcall(sg, mros_of alloc vos) :: k
  | Xtailcall(sg, vos, args) ->
      LTL.Ltailcall(sg, mros_of alloc vos) :: []
  | Xbuiltin(ef, args, res) ->
      begin match ef with
      | EF_annot _ ->
          LTL.Lannot(ef, List.map alloc args) :: k
      | _ ->
          LTL.Lbuiltin(ef, mregs_of alloc args, mregs_of alloc res) :: k
      end
  | Xbranch s ->
      LTL.Lbranch s :: []
  | Xcond(cond, args, s1, s2) ->
      LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
  | Xjumptable(arg, tbl) ->
      LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
  | Xreturn optarg ->
      LTL.Lreturn :: []

let rec transl_block alloc blk =
  match blk with
  | [] -> []
  | instr :: blk' -> transl_instr alloc instr (transl_block alloc blk')

let transl_function fn alloc =
  { LTL.fn_sig = fn.fn_sig;
    LTL.fn_stacksize = fn.fn_stacksize;
    LTL.fn_entrypoint = fn.fn_entrypoint;
    LTL.fn_code = PTree.map1 (transl_block alloc) fn.fn_code 
  }


(******************* All together *********************)

exception Timeout

let rec first_round f liveness =
  let alloc = find_coloring f liveness in
  if !option_dalloctrace then begin
    fprintf !pp "-------------- After initial register allocation\n\n";
    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f
  end;
  let ts = tospill_function f alloc in
  if VSet.is_empty ts then success f alloc else more_rounds f ts 1

and more_rounds f ts count =
  if count >= 40 then raise Timeout;
  let f' = spill_function f ts count in
  let liveness = liveness_analysis f' in
  let alloc = find_coloring f' liveness in
  if !option_dalloctrace then begin
    fprintf !pp "-------------- After register allocation (round %d)\n\n" count;
    PrintXTL.print_function !pp ~alloc: alloc ~live: liveness f'
  end;
  let ts' = tospill_function f' alloc in
  if VSet.is_empty ts'
  then success f' alloc
  else begin
    if !option_dalloctrace then begin
      fprintf !pp "--- Remain to be spilled:\n";
      VSet.iter (fun v -> fprintf !pp "%a " PrintXTL.var v) ts';
      fprintf !pp "\n\n"
    end;      
    more_rounds f (VSet.union ts ts') (count + 1)
  end

and success f alloc =
  let f' = transl_function f alloc in
  if !option_dalloctrace then begin
    fprintf !pp "-------------- Candidate allocation\n\n";
    PrintLTL.print_function !pp P.one f'
  end;
  f'

open Errors

let regalloc f =
  init_trace();
  reset_temps();
  let f1 = Splitting.rename_function f in
  match RTLtyping.type_function f1 with
  | Error msg ->
      Error(MSG (coqstring_of_camlstring "RTL code after splitting is ill-typed:") :: msg)
  | OK tyenv ->
      let f2 = function_of_RTL_function f1 tyenv in
      let liveness = liveness_analysis f2 in
      let f3 = dead_code_elimination f2 liveness in
      if !option_dalloctrace then begin
        fprintf !pp "-------------- Initial XTL\n\n";
        PrintXTL.print_function !pp f3
      end;
      try
        OK(first_round f3 liveness)
      with
      | Timeout ->
          Error(msg (coqstring_of_camlstring "Spilling fails to converge"))
      | Type_error_at pc ->
          Error [MSG(coqstring_of_camlstring "Ill-typed XTL code at PC "); 
                 POS pc]
      | Bad_LTL ->
          Error(msg (coqstring_of_camlstring "Bad LTL after spilling"))