summaryrefslogtreecommitdiff
path: root/powerpc/Asmexpand.ml
diff options
context:
space:
mode:
Diffstat (limited to 'powerpc/Asmexpand.ml')
-rw-r--r--powerpc/Asmexpand.ml525
1 files changed, 525 insertions, 0 deletions
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
new file mode 100644
index 0000000..243a4d9
--- /dev/null
+++ b/powerpc/Asmexpand.ml
@@ -0,0 +1,525 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+ of the PPC assembly code. *)
+
+open Datatypes
+open Camlcoq
+open Integers
+open AST
+open Memdata
+open Asm
+
+(* Buffering the expanded code *)
+
+let current_code = ref ([]: instruction list)
+
+let emit i = current_code := i :: !current_code
+
+let emit_loadimm r n =
+ List.iter emit (Asmgen.loadimm r n [])
+
+let emit_addimm rd rs n =
+ List.iter emit (Asmgen.addimm rd rs n [])
+
+let get_code () =
+ let c = List.rev !current_code in current_code := []; c
+
+(* Generation of fresh labels *)
+
+let dummy_function = { fn_code = []; fn_sig = signature_main }
+let current_function = ref dummy_function
+let next_label = ref (None : label option)
+
+let new_label () =
+ let lbl =
+ match !next_label with
+ | Some l -> l
+ | None ->
+ (* on-demand computation of the next available label *)
+ List.fold_left
+ (fun next instr ->
+ match instr with
+ | Plabel l -> if P.lt l next then next else P.succ l
+ | _ -> next)
+ P.one (!current_function).fn_code
+ in
+ next_label := Some (P.succ lbl);
+ lbl
+
+let set_current_function f =
+ current_function := f; next_label := None
+
+(* Useful constants *)
+
+let _0 = Integers.Int.zero
+let _1 = Integers.Int.one
+let _2 = coqint_of_camlint 2l
+let _4 = coqint_of_camlint 4l
+let _6 = coqint_of_camlint 6l
+let _8 = coqint_of_camlint 8l
+let _m4 = coqint_of_camlint (-4l)
+let _m8 = coqint_of_camlint (-8l)
+
+(* Handling of annotations *)
+
+let expand_annot_val txt targ args res =
+ emit (Pannot(EF_annot(txt, [AA_arg targ]), List.map (fun r -> APreg r) args));
+ begin match args, res with
+ | [IR src], [IR dst] ->
+ if dst <> src then emit (Pmr(dst, src))
+ | [FR src], [FR dst] ->
+ if dst <> src then emit (Pfmr(dst, src))
+ | _, _ ->
+ assert false
+ end
+
+(* Handling of memcpy *)
+
+(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are
+ fast, but unaligned accesses to 64-bit floats can be slow
+ (not so much on G5, but clearly so on Power7).
+ So, use 64-bit accesses only if alignment >= 4.
+ Note that lfd and stfd cannot trap on ill-formed floats. *)
+
+let expand_builtin_memcpy_small sz al src dst =
+ let rec copy ofs sz =
+ if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin
+ emit (Plfd(FPR13, Cint ofs, src));
+ emit (Pstfd(FPR13, Cint ofs, dst));
+ copy (Int.add ofs _8) (sz - 8)
+ end else if sz >= 4 then begin
+ emit (Plwz(GPR0, Cint ofs, src));
+ emit (Pstw(GPR0, Cint ofs, dst));
+ copy (Int.add ofs _4) (sz - 4)
+ end else if sz >= 2 then begin
+ emit (Plhz(GPR0, Cint ofs, src));
+ emit (Psth(GPR0, Cint ofs, dst));
+ copy (Int.add ofs _2) (sz - 2)
+ end else if sz >= 1 then begin
+ emit (Plbz(GPR0, Cint ofs, src));
+ emit (Pstb(GPR0, Cint ofs, dst));
+ copy (Int.add ofs _1) (sz - 1)
+ end in
+ copy _0 sz
+
+let expand_builtin_memcpy_big sz al src dst =
+ assert (sz >= 4);
+ emit_loadimm GPR0 (Z.of_uint (sz / 4));
+ emit (Pmtctr GPR0);
+ let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in
+ emit (Paddi(s, src, Cint _m4));
+ emit (Paddi(d, dst, Cint _m4));
+ let lbl = new_label() in
+ emit (Plabel lbl);
+ emit (Plwzu(GPR0, Cint _4, s));
+ emit (Pstwu(GPR0, Cint _4, d));
+ emit (Pbdnz lbl);
+ (* s and d lag behind by 4 bytes *)
+ match sz land 3 with
+ | 1 -> emit (Plbz(GPR0, Cint _4, s));
+ emit (Pstb(GPR0, Cint _4, d))
+ | 2 -> emit (Plhz(GPR0, Cint _4, s));
+ emit (Psth(GPR0, Cint _4, d))
+ | 3 -> emit (Plhz(GPR0, Cint _4, s));
+ emit (Psth(GPR0, Cint _4, d));
+ emit (Plbz(GPR0, Cint _6, s));
+ emit (Pstb(GPR0, Cint _6, d))
+ | _ -> ()
+
+let expand_builtin_memcpy sz al args =
+ let (dst, src) =
+ match args with [IR d; IR s] -> (d, s) | _ -> assert false in
+ if sz <= (if !Clflags.option_ffpu && al >= 4
+ then if !Clflags.option_Osize then 35 else 51
+ else if !Clflags.option_Osize then 19 else 27)
+ then expand_builtin_memcpy_small sz al src dst
+ else expand_builtin_memcpy_big sz al src dst
+
+(* Handling of volatile reads and writes *)
+
+let expand_builtin_vload_common chunk base offset res =
+ match chunk, res with
+ | Mint8unsigned, IR res ->
+ emit (Plbz(res, offset, base))
+ | Mint8signed, IR res ->
+ emit (Plbz(res, offset, base));
+ emit (Pextsb(res, res))
+ | Mint16unsigned, IR res ->
+ emit (Plhz(res, offset, base))
+ | Mint16signed, IR res ->
+ emit (Plha(res, offset, base))
+ | (Mint32 | Many32), IR res ->
+ emit (Plwz(res, offset, base))
+ | Mfloat32, FR res ->
+ emit (Plfs(res, offset, base))
+ | (Mfloat64 | Many64), FR res ->
+ emit (Plfd(res, offset, base))
+ (* Mint64 is special-cased below *)
+ | _ ->
+ assert false
+
+let expand_builtin_vload chunk args res =
+ begin match args, res with
+ | [IR addr], [res] when chunk <> Mint64 ->
+ expand_builtin_vload_common chunk addr (Cint _0) res
+ | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
+ if addr <> res1 then begin
+ emit (Plwz(res1, Cint _0, addr));
+ emit (Plwz(res2, Cint _4, addr))
+ end else begin
+ emit (Plwz(res2, Cint _4, addr));
+ emit (Plwz(res1, Cint _0, addr))
+ end
+ | _ ->
+ assert false
+ end
+
+let expand_builtin_vload_global chunk id ofs args res =
+ begin match res with
+ | [res] when chunk <> Mint64 ->
+ emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs)));
+ expand_builtin_vload_common chunk GPR11 (Csymbol_low(id, ofs)) res
+ | [IR res1; IR res2] when chunk = Mint64 ->
+ emit (Paddis(res1, GPR0, Csymbol_high(id, ofs)));
+ emit (Plwz(res1, Csymbol_low(id, ofs), res1));
+ let ofs = Int.add ofs _4 in
+ emit (Paddis(res2, GPR0, Csymbol_high(id, ofs)));
+ emit (Plwz(res2, Csymbol_low(id, ofs), res2))
+ | _ ->
+ assert false
+ end
+
+let expand_builtin_vload_sda chunk id ofs args res =
+ begin match res with
+ | [res] when chunk <> Mint64 ->
+ expand_builtin_vload_common chunk GPR0 (Csymbol_sda(id, ofs)) res
+ | [IR res1; IR res2] when chunk = Mint64 ->
+ emit (Plwz(res1, Csymbol_sda(id, ofs), GPR0));
+ let ofs = Int.add ofs _4 in
+ emit (Plwz(res2, Csymbol_sda(id, ofs), GPR0))
+ | _ ->
+ assert false
+ end
+
+let expand_builtin_vstore_common chunk base offset src =
+ match chunk, src with
+ | (Mint8signed | Mint8unsigned), IR src ->
+ emit (Pstb(src, offset, base))
+ | (Mint16signed | Mint16unsigned), IR src ->
+ emit (Psth(src, offset, base))
+ | (Mint32 | Many32), IR src ->
+ emit (Pstw(src, offset, base))
+ | Mfloat32, FR src ->
+ emit (Pstfs(src, offset, base))
+ | (Mfloat64 | Many64), FR src ->
+ emit (Pstfd(src, offset, base))
+ (* Mint64 is special-cased below *)
+ | _ ->
+ assert false
+
+let expand_builtin_vstore chunk args =
+ begin match args with
+ | [IR addr; src] when chunk <> Mint64 ->
+ expand_builtin_vstore_common chunk addr (Cint _0) src
+ | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
+ emit (Pstw(src1, Cint _0, addr));
+ emit (Pstw(src2, Cint _4, addr))
+ | _ ->
+ assert false
+ end
+
+let expand_builtin_vstore_global chunk id ofs args =
+ begin match args with
+ | [src] when chunk <> Mint64 ->
+ let tmp = if src = IR GPR11 then GPR12 else GPR11 in
+ emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+ expand_builtin_vstore_common chunk tmp (Csymbol_low(id, ofs)) src
+ | [IR src1; IR src2] when chunk = Mint64 ->
+ let tmp =
+ if not (List.mem GPR12 [src1; src2]) then GPR12 else
+ if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
+ emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+ emit (Pstw(src1, Csymbol_low(id, ofs), tmp));
+ let ofs = Int.add ofs _4 in
+ emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+ emit (Pstw(src2, Csymbol_low(id, ofs), tmp))
+ | _ ->
+ assert false
+ end
+
+let expand_builtin_vstore_sda chunk id ofs args =
+ begin match args with
+ | [src] when chunk <> Mint64 ->
+ expand_builtin_vstore_common chunk GPR0 (Csymbol_sda(id, ofs)) src
+ | [IR src1; IR src2] when chunk = Mint64 ->
+ emit (Pstw(src1, Csymbol_sda(id, ofs), GPR0));
+ emit (Pstw(src2, Csymbol_sda(id, ofs), GPR0))
+ | _ ->
+ assert false
+ end
+
+(* Handling of varargs *)
+
+let current_function_stacksize = ref 0l
+
+let align n a = (n + a - 1) land (-a)
+
+let rec next_arg_locations ir fr ofs = function
+ | [] ->
+ (ir, fr, ofs)
+ | (Tint | Tany32) :: l ->
+ if ir < 8
+ then next_arg_locations (ir + 1) fr ofs l
+ else next_arg_locations ir fr (ofs + 4) l
+ | (Tfloat | Tsingle | Tany64) :: l ->
+ if fr < 8
+ then next_arg_locations ir (fr + 1) ofs l
+ else next_arg_locations ir fr (align ofs 8 + 8) l
+ | Tlong :: l ->
+ if ir < 7
+ then next_arg_locations (align ir 2 + 2) fr ofs l
+ else next_arg_locations ir fr (align ofs 8 + 8) l
+
+let expand_builtin_va_start r =
+ if not (!current_function).fn_sig.sig_cc.cc_vararg then
+ invalid_arg "Fatal error: va_start used in non-vararg function";
+ let (ir, fr, ofs) =
+ next_arg_locations 0 0 0 (!current_function).fn_sig.sig_args in
+ emit_loadimm GPR0 (Z.of_uint ir);
+ emit (Pstb(GPR0, Cint _0, r));
+ emit_loadimm GPR0 (Z.of_uint fr);
+ emit (Pstb(GPR0, Cint _1, r));
+ emit_addimm GPR0 GPR1 (coqint_of_camlint
+ Int32.(add (add !current_function_stacksize 8l)
+ (of_int ofs)));
+ emit (Pstw(GPR0, Cint _4, r));
+ emit_addimm GPR0 GPR1 (coqint_of_camlint
+ Int32.(sub !current_function_stacksize 96l));
+ emit (Pstw(GPR0, Cint _8, r))
+
+(* Auxiliary for 64-bit integer arithmetic built-ins. They expand to
+ two instructions, one computing the low 32 bits of the result,
+ followed by another computing the high 32 bits. In cases where
+ the first instruction would overwrite arguments to the second
+ instruction, we must go through GPR0 to hold the low 32 bits of the result.
+*)
+
+let expand_int64_arith conflict rl fn =
+ if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl
+
+(* Handling of compiler-inlined builtins *)
+
+let expand_builtin_inline name args res =
+ (* Can use as temporaries: GPR0, FPR13 *)
+ match name, args, res with
+ (* Integer arithmetic *)
+ | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
+ emit (Pmulhw(res, a1, a2))
+ | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
+ emit (Pmulhwu(res, a1, a2))
+ | "__builtin_cntlz", [IR a1], [IR res] ->
+ emit (Pcntlz(res, a1))
+ | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] ->
+ emit (Pstwu(a1, Cint _m8, GPR1));
+ emit (Pcfi_adjust _8);
+ emit (Plwbrx(res, GPR0, GPR1));
+ emit (Paddi(GPR1, GPR1, Cint _8));
+ emit (Pcfi_adjust _m8)
+ | "__builtin_bswap16", [IR a1], [IR res] ->
+ emit (Prlwinm(GPR0, a1, _8, coqint_of_camlint 0x0000FF00l));
+ emit (Prlwinm(res, a1, coqint_of_camlint 24l,
+ coqint_of_camlint 0x000000FFl));
+ emit (Por(res, GPR0, res))
+ (* Float arithmetic *)
+ | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
+ emit (Pfmadd(res, a1, a2, a3))
+ | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
+ emit (Pfmsub(res, a1, a2, a3))
+ | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
+ emit (Pfnmadd(res, a1, a2, a3))
+ | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
+ emit (Pfnmsub(res, a1, a2, a3))
+ | "__builtin_fabs", [FR a1], [FR res] ->
+ emit (Pfabs(res, a1))
+ | "__builtin_fsqrt", [FR a1], [FR res] ->
+ emit (Pfsqrt(res, a1))
+ | "__builtin_frsqrte", [FR a1], [FR res] ->
+ emit (Pfrsqrte(res, a1))
+ | "__builtin_fres", [FR a1], [FR res] ->
+ emit (Pfres(res, a1))
+ | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
+ emit (Pfsel(res, a1, a2, a3))
+ | "__builtin_fcti", [FR a1], [IR res] ->
+ emit (Pfctiw(FPR13, a1));
+ emit (Pstfdu(FPR13, Cint _m8, GPR1));
+ emit (Pcfi_adjust _8);
+ emit (Plwz(res, Cint _4, GPR1));
+ emit (Paddi(GPR1, GPR1, Cint _8));
+ emit (Pcfi_adjust _m8)
+ (* 64-bit integer arithmetic *)
+ | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+ expand_int64_arith (rl = ah) rl (fun rl' ->
+ emit (Psubfic(rl', al, Cint _0));
+ emit (Psubfze(rh, ah)))
+ | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ expand_int64_arith (rl = ah || rl = bh) rl (fun rl' ->
+ emit (Paddc(rl', al, bl));
+ emit (Padde(rh, ah, bh)))
+ | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ expand_int64_arith (rl = ah || rl = bh) rl (fun rl' ->
+ emit (Psubfc(rl', bl, al));
+ emit (Psubfe(rh, bh, ah)))
+ | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+ expand_int64_arith (rl = a || rl = b) rl (fun rl' ->
+ emit (Pmullw(rl, a, b));
+ emit (Pmulhwu(rh, a, b)))
+ (* Memory accesses *)
+ | "__builtin_read16_reversed", [IR a1], [IR res] ->
+ emit (Plhbrx(res, GPR0, a1))
+ | "__builtin_read32_reversed", [IR a1], [IR res] ->
+ emit (Plwbrx(res, GPR0, a1))
+ | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
+ emit (Psthbrx(a2, GPR0, a1))
+ | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
+ emit (Pstwbrx(a2, GPR0, a1))
+ (* Synchronization *)
+ | "__builtin_eieio", [], _ ->
+ emit (Peieio)
+ | "__builtin_sync", [], _ ->
+ emit (Psync)
+ | "__builtin_isync", [], _ ->
+ emit (Pisync)
+ | "__builtin_trap", [], _ ->
+ emit (Ptrap)
+ (* Vararg stuff *)
+ | "__builtin_va_start", [IR a], _ ->
+ expand_builtin_va_start a
+ (* Catch-all *)
+ | _ ->
+ invalid_arg ("unrecognized builtin " ^ name)
+
+(* Calls to variadic functions: condition bit 6 must be set
+ if at least one argument is a float; clear otherwise.
+ Note that variadic functions cannot have arguments of type Tsingle. *)
+
+let set_cr6 sg =
+ if sg.sig_cc.cc_vararg then begin
+ if List.mem Tfloat sg.sig_args
+ then emit (Pcreqv(CRbit_6, CRbit_6, CRbit_6))
+ else emit (Pcrxor(CRbit_6, CRbit_6, CRbit_6))
+ end
+
+(* Expand instructions *)
+
+let num_crbit = function
+ | CRbit_0 -> 0
+ | CRbit_1 -> 1
+ | CRbit_2 -> 2
+ | CRbit_3 -> 3
+ | CRbit_6 -> 6
+
+let expand_instruction instr =
+ match instr with
+ | Pallocframe(sz, ofs) ->
+ let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in
+ let sz = camlint_of_coqint sz in
+ assert (ofs = Int.zero);
+ let sz = if variadic then Int32.add sz 96l else sz in
+ let adj = Int32.neg sz in
+ if adj >= -0x8000l then
+ emit (Pstwu(GPR1, Cint(coqint_of_camlint adj), GPR1))
+ else begin
+ emit_loadimm GPR0 (coqint_of_camlint adj);
+ emit (Pstwxu(GPR1, GPR1, GPR0))
+ end;
+ emit (Pcfi_adjust (coqint_of_camlint sz));
+ if variadic then begin
+ emit (Pmflr GPR0);
+ emit (Pbl(intern_string "__compcert_va_saveregs",
+ {sig_args = []; sig_res = None; sig_cc = cc_default}));
+ emit (Pmtlr GPR0)
+ end;
+ current_function_stacksize := sz
+ | Pbctr sg | Pbctrl sg | Pbl(_, sg) | Pbs(_, sg) ->
+ set_cr6 sg;
+ emit instr
+ | Pfreeframe(sz, ofs) ->
+ let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in
+ let sz = camlint_of_coqint sz in
+ let sz = if variadic then Int32.add sz 96l else sz in
+ if sz < 0x8000l then
+ emit (Paddi(GPR1, GPR1, Cint(coqint_of_camlint sz)))
+ else
+ emit (Plwz(GPR1, Cint ofs, GPR1))
+ | Pfcti(r1, r2) ->
+ emit (Pfctiwz(FPR13, r2));
+ emit (Pstfdu(FPR13, Cint _m8, GPR1));
+ emit (Pcfi_adjust _8);
+ emit (Plwz(r1, Cint _4, GPR1));
+ emit (Paddi(GPR1, GPR1, Cint _8));
+ emit (Pcfi_adjust _m8)
+ | Pfmake(rd, r1, r2) ->
+ emit (Pstwu(r1, Cint _m8, GPR1));
+ emit (Pcfi_adjust _8);
+ emit (Pstw(r2, Cint _4, GPR1));
+ emit (Plfd(rd, Cint _0, GPR1));
+ emit (Paddi(GPR1, GPR1, Cint _8));
+ emit (Pcfi_adjust _m8);
+ | Pfxdp(r1, r2) ->
+ if r1 <> r2 then emit(Pfmr(r1, r2))
+ | Pmfcrbit(r1, bit) ->
+ emit (Pmfcr r1);
+ emit (Prlwinm(r1, r1, Z.of_uint (1 + num_crbit bit), _1))
+ | Pbuiltin(ef, args, res) ->
+ begin match ef with
+ | EF_builtin(name, sg) ->
+ expand_builtin_inline (extern_atom name) args res
+ | EF_vload chunk ->
+ expand_builtin_vload chunk args res
+ | EF_vstore chunk ->
+ expand_builtin_vstore chunk args
+ | EF_vload_global(chunk, id, ofs) ->
+ if symbol_is_small_data id ofs
+ then expand_builtin_vload_sda chunk id ofs args res
+ else expand_builtin_vload_global chunk id ofs args res
+ | EF_vstore_global(chunk, id, ofs) ->
+ if symbol_is_small_data id ofs
+ then expand_builtin_vstore_sda chunk id ofs args
+ else expand_builtin_vstore_global chunk id ofs args
+ | EF_memcpy(sz, al) ->
+ expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
+ | EF_annot_val(txt, targ) ->
+ expand_annot_val txt targ args res
+ | EF_inline_asm txt ->
+ emit instr
+ | _ ->
+ assert false
+ end
+ | _ ->
+ emit instr
+
+let expand_function fn =
+ set_current_function fn;
+ current_code := [];
+ List.iter expand_instruction fn.fn_code;
+ let c = get_code() in
+ set_current_function dummy_function;
+ { fn with fn_code = c }
+
+let expand_fundef = function
+ | Internal f -> Internal (expand_function f)
+ | External ef -> External ef
+
+let expand_program (p: Asm.program) : Asm.program =
+ AST.transform_program expand_fundef p