(* *********************************************************************) (* *) (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) (* under the terms of the INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) (* Printing PPC assembly code in asm syntax *) open Printf open Datatypes open Maps open Camlcoq open Sections open AST open Memdata open Asm (* Recognition of target ABI and asm syntax *) type target = MacOS | Linux | Diab let target = match Configuration.system with | "macosx" -> MacOS | "linux" -> Linux | "diab" -> Diab | _ -> invalid_arg ("System " ^ Configuration.system ^ " not supported") (* On-the-fly label renaming *) let next_label = ref 100 let new_label() = let lbl = !next_label in incr next_label; lbl let current_function_labels = (Hashtbl.create 39 : (label, int) Hashtbl.t) let transl_label lbl = try Hashtbl.find current_function_labels lbl with Not_found -> let lbl' = new_label() in Hashtbl.add current_function_labels lbl lbl'; lbl' (* Record identifiers of functions that need a special stub *) module IdentSet = Set.Make(struct type t = ident let compare = compare end) let stubbed_functions = ref IdentSet.empty (* Basic printing functions *) let coqint oc n = fprintf oc "%ld" (camlint_of_coqint n) let raw_symbol oc s = match target with | MacOS -> fprintf oc "_%s" s | Linux|Diab -> fprintf oc "%s" s let symbol oc symb = match target with | MacOS -> if IdentSet.mem symb !stubbed_functions then fprintf oc "L%s$stub" (extern_atom symb) else fprintf oc "_%s" (extern_atom symb) | Linux | Diab -> if IdentSet.mem symb !stubbed_functions then fprintf oc ".L%s$stub" (extern_atom symb) else fprintf oc "%s" (extern_atom symb) let symbol_offset oc (symb, ofs) = symbol oc symb; if ofs <> 0l then fprintf oc " + %ld" ofs let label oc lbl = match target with | MacOS -> fprintf oc "L%d" lbl | Linux|Diab -> fprintf oc ".L%d" lbl let label_low oc lbl = match target with | MacOS -> fprintf oc "lo16(L%d)" lbl | Linux|Diab -> fprintf oc ".L%d@l" lbl let label_high oc lbl = match target with | MacOS -> fprintf oc "ha16(L%d)" lbl | Linux|Diab -> fprintf oc ".L%d@ha" lbl let comment = match target with | MacOS -> ";" | Linux -> "#" | Diab -> ";" let constant oc cst = match cst with | Cint n -> fprintf oc "%ld" (camlint_of_coqint n) | Csymbol_low(s, n) -> begin match target with | MacOS -> fprintf oc "lo16(%a)" symbol_offset (s, camlint_of_coqint n) | Linux|Diab -> fprintf oc "(%a)@l" symbol_offset (s, camlint_of_coqint n) end | Csymbol_high(s, n) -> begin match target with | MacOS -> fprintf oc "ha16(%a)" symbol_offset (s, camlint_of_coqint n) | Linux|Diab -> fprintf oc "(%a)@ha" symbol_offset (s, camlint_of_coqint n) end | Csymbol_sda(s, n) -> begin match target with | MacOS -> assert false | Linux -> fprintf oc "(%a)@sda21" symbol_offset (s, camlint_of_coqint n) | Diab -> fprintf oc "(%a)@sdarx" symbol_offset (s, camlint_of_coqint n) end let num_crbit = function | CRbit_0 -> 0 | CRbit_1 -> 1 | CRbit_2 -> 2 | CRbit_3 -> 3 let crbit oc bit = fprintf oc "%d" (num_crbit bit) let int_reg_name = function | GPR0 -> "0" | GPR1 -> "1" | GPR2 -> "2" | GPR3 -> "3" | GPR4 -> "4" | GPR5 -> "5" | GPR6 -> "6" | GPR7 -> "7" | GPR8 -> "8" | GPR9 -> "9" | GPR10 -> "10" | GPR11 -> "11" | GPR12 -> "12" | GPR13 -> "13" | GPR14 -> "14" | GPR15 -> "15" | GPR16 -> "16" | GPR17 -> "17" | GPR18 -> "18" | GPR19 -> "19" | GPR20 -> "20" | GPR21 -> "21" | GPR22 -> "22" | GPR23 -> "23" | GPR24 -> "24" | GPR25 -> "25" | GPR26 -> "26" | GPR27 -> "27" | GPR28 -> "28" | GPR29 -> "29" | GPR30 -> "30" | GPR31 -> "31" let float_reg_name = function | FPR0 -> "0" | FPR1 -> "1" | FPR2 -> "2" | FPR3 -> "3" | FPR4 -> "4" | FPR5 -> "5" | FPR6 -> "6" | FPR7 -> "7" | FPR8 -> "8" | FPR9 -> "9" | FPR10 -> "10" | FPR11 -> "11" | FPR12 -> "12" | FPR13 -> "13" | FPR14 -> "14" | FPR15 -> "15" | FPR16 -> "16" | FPR17 -> "17" | FPR18 -> "18" | FPR19 -> "19" | FPR20 -> "20" | FPR21 -> "21" | FPR22 -> "22" | FPR23 -> "23" | FPR24 -> "24" | FPR25 -> "25" | FPR26 -> "26" | FPR27 -> "27" | FPR28 -> "28" | FPR29 -> "29" | FPR30 -> "30" | FPR31 -> "31" let ireg oc r = begin match target with | MacOS|Diab -> output_char oc 'r' | Linux -> () end; output_string oc (int_reg_name r) let ireg_or_zero oc r = if r = GPR0 then output_string oc "0" else ireg oc r let freg oc r = begin match target with | MacOS|Diab -> output_char oc 'f' | Linux -> () end; output_string oc (float_reg_name r) let creg oc r = match target with | MacOS|Diab -> fprintf oc "cr%d" r | Linux -> fprintf oc "%d" r let preg oc = function | IR r -> ireg oc r | FR r -> freg oc r | _ -> assert false (* Names of sections *) let name_of_section_MacOS = function | Section_text -> ".text" | Section_data _ -> ".data" | Section_small_data _ -> ".data" | Section_const -> ".const" | Section_small_const -> ".const" | Section_string -> ".const" | Section_literal -> ".literal8" | Section_jumptable -> ".const" | Section_user(s, wr, ex) -> sprintf ".section %s, %s, %s" (if wr then "__DATA" else "__TEXT") s (if ex then "regular, pure_instructions" else "regular") let name_of_section_Linux = function | Section_text -> ".text" | Section_data i -> if i then ".data" else "COMM" | Section_small_data i -> if i then ".sdata" else "COMM" | Section_const -> ".rodata" | Section_small_const -> ".sdata2" | Section_string -> ".rodata" | Section_literal -> ".section .rodata.cst8,\"aM\",@progbits,8" | Section_jumptable -> ".text" | Section_user(s, wr, ex) -> sprintf ".section %s,\"a%s%s\",@progbits" s (if wr then "w" else "") (if ex then "x" else "") let name_of_section_Diab = function | Section_text -> ".text" | Section_data i -> if i then ".data" else ".bss" | Section_small_data i -> if i then ".sdata" else ".sbss" | Section_const -> ".text" | Section_small_const -> ".sdata2" | Section_string -> ".text" | Section_literal -> ".text" | Section_jumptable -> ".text" | Section_user(s, wr, ex) -> sprintf ".section %s,,%c" s (match wr, ex with | true, true -> 'm' (* text+data *) | true, false -> 'd' (* data *) | false, true -> 'c' (* text *) | false, false -> 'r') (* const *) let name_of_section = match target with | MacOS -> name_of_section_MacOS | Linux -> name_of_section_Linux | Diab -> name_of_section_Diab let section oc sec = let name = name_of_section sec in assert (name <> "COMM"); fprintf oc " %s\n" name (* Encoding masks for rlwinm instructions *) let rolm_mask n = let mb = ref 0 (* location of last 0->1 transition *) and me = ref 32 (* location of last 1->0 transition *) and last = ref ((Int32.logand n 1l) <> 0l) (* last bit seen *) and count = ref 0 (* number of transitions *) and mask = ref 0x8000_0000l in for mx = 0 to 31 do if Int32.logand n !mask <> 0l then if !last then () else (incr count; mb := mx; last := true) else if !last then (incr count; me := mx; last := false) else (); mask := Int32.shift_right_logical !mask 1 done; if !me = 0 then me := 32; assert (!count = 2 || (!count = 0 && !last)); (!mb, !me-1) (* Base-2 log of a Caml integer *) let rec log2 n = assert (n > 0); if n = 1 then 0 else 1 + log2 (n lsr 1) (* Built-ins. They come in three flavors: - annotation statements: take their arguments in registers or stack locations; generate no code; - inlined by the compiler: take their arguments in arbitrary registers; preserve all registers except the temporaries (GPR0, GPR11, GPR12, FPR0, FPR12, FPR13); - inlined while printing asm code; take their arguments in locations dictated by the calling conventions; preserve callee-save regs only. *) (* Handling of annotations *) let re_annot_param = Str.regexp "%%\\|%[1-9][0-9]*" let print_annot_text print_arg oc txt args = fprintf oc "%s annotation: " comment; let print_fragment = function | Str.Text s -> output_string oc s | Str.Delim "%%" -> output_char oc '%' | Str.Delim s -> let n = int_of_string (String.sub s 1 (String.length s - 1)) in try print_arg oc (List.nth args (n-1)) with Failure _ -> fprintf oc "" s in List.iter print_fragment (Str.full_split re_annot_param txt); fprintf oc "\n" let print_annot_stmt oc txt args = let print_annot_param oc = function | APreg r -> preg oc r | APstack(chunk, ofs) -> fprintf oc "mem(R1 + %a, %a)" coqint ofs coqint (size_chunk chunk) in print_annot_text print_annot_param oc txt args let print_annot_val oc txt args res = print_annot_text preg oc txt args; match args, res with | IR src :: _, IR dst -> if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src | FR src :: _, FR dst -> if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src | _, _ -> assert false (* Handling of memcpy *) (* On the PowerPC, unaligned accesses to 16- and 32-bit integers are fast, but unaligned accesses to 64-bit floats can be slow (not so much on G5, but clearly so on Power7). So, use 64-bit accesses only if alignment >= 4. Note that lfd and stfd cannot trap on ill-formed floats. *) let print_builtin_memcpy_small oc sz al src dst = let rec copy ofs sz = if sz >= 8 && al >= 4 then begin fprintf oc " lfd %a, %d(%a)\n" freg FPR0 ofs ireg src; fprintf oc " stfd %a, %d(%a)\n" freg FPR0 ofs ireg dst; copy (ofs + 8) (sz - 8) end else if sz >= 4 then begin fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src; fprintf oc " stw %a, %d(%a)\n" ireg GPR0 ofs ireg dst; copy (ofs + 4) (sz - 4) end else if sz >= 2 then begin fprintf oc " lhz %a, %d(%a)\n" ireg GPR0 ofs ireg src; fprintf oc " sth %a, %d(%a)\n" ireg GPR0 ofs ireg dst; copy (ofs + 2) (sz - 2) end else if sz >= 1 then begin fprintf oc " lbz %a, %d(%a)\n" ireg GPR0 ofs ireg src; fprintf oc " stb %a, %d(%a)\n" ireg GPR0 ofs ireg dst; copy (ofs + 1) (sz - 1) end in copy 0 sz let print_builtin_memcpy_big oc sz al src dst = assert (sz >= 4); fprintf oc " li %a, %d\n" ireg GPR0 (sz / 4); fprintf oc " mtctr %a\n" ireg GPR0; let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in fprintf oc " addi %a, %a, -4\n" ireg s ireg src; fprintf oc " addi %a, %a, -4\n" ireg d ireg dst; let lbl = new_label() in fprintf oc "%a: lwzu %a, 4(%a)\n" label lbl ireg GPR0 ireg s; fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg d; fprintf oc " bdnz %a\n" label lbl; (* s and d lag behind by 4 bytes *) match sz land 3 with | 1 -> fprintf oc " lbz %a, 4(%a)\n" ireg GPR0 ireg s; fprintf oc " stb %a, 4(%a)\n" ireg GPR0 ireg d | 2 -> fprintf oc " lhz %a, 4(%a)\n" ireg GPR0 ireg s; fprintf oc " sth %a, 4(%a)\n" ireg GPR0 ireg d | 3 -> fprintf oc " lhz %a, 4(%a)\n" ireg GPR0 ireg s; fprintf oc " sth %a, 4(%a)\n" ireg GPR0 ireg d; fprintf oc " lbz %a, 6(%a)\n" ireg GPR0 ireg s; fprintf oc " stb %a, 6(%a)\n" ireg GPR0 ireg d | _ -> () let print_builtin_memcpy oc sz al args = let (dst, src) = match args with [IR d; IR s] -> (d, s) | _ -> assert false in fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n" comment sz al; if sz <= 64 then print_builtin_memcpy_small oc sz al src dst else print_builtin_memcpy_big oc sz al src dst; fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment (* Handling of volatile reads and writes *) let print_builtin_vload_common oc chunk base offset res = match chunk, res with | Mint8unsigned, IR res -> fprintf oc " lbz %a, %a(%a)\n" ireg res constant offset ireg base | Mint8signed, IR res -> fprintf oc " lbz %a, %a(%a)\n" ireg res constant offset ireg base; fprintf oc " extsb %a, %a\n" ireg res ireg res | Mint16unsigned, IR res -> fprintf oc " lhz %a, %a(%a)\n" ireg res constant offset ireg base | Mint16signed, IR res -> fprintf oc " lha %a, %a(%a)\n" ireg res constant offset ireg base | Mint32, IR res -> fprintf oc " lwz %a, %a(%a)\n" ireg res constant offset ireg base | Mfloat32, FR res -> fprintf oc " lfs %a, %a(%a)\n" freg res constant offset ireg base | Mfloat64, FR res -> fprintf oc " lfd %a, %a(%a)\n" freg res constant offset ireg base | _ -> assert false let print_builtin_vload oc chunk args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; begin match args with | [IR addr] -> print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res | _ -> assert false end; fprintf oc "%s end builtin __builtin_volatile_read\n" comment let print_builtin_vload_global oc chunk id ofs args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; fprintf oc " addis %a, %a, %a\n" ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res; fprintf oc "%s end builtin __builtin_volatile_read\n" comment let print_builtin_vstore_common oc chunk base offset src = match chunk, src with | (Mint8signed | Mint8unsigned), IR src -> fprintf oc " stb %a, %a(%a)\n" ireg src constant offset ireg base | (Mint16signed | Mint16unsigned), IR src -> fprintf oc " sth %a, %a(%a)\n" ireg src constant offset ireg base | Mint32, IR src -> fprintf oc " stw %a, %a(%a)\n" ireg src constant offset ireg base | Mfloat32, FR src -> fprintf oc " frsp %a, %a\n" freg FPR13 freg src; fprintf oc " stfs %a, %a(%a)\n" freg FPR13 constant offset ireg base | Mfloat64, FR src -> fprintf oc " stfd %a, %a(%a)\n" freg src constant offset ireg base | _ -> assert false let print_builtin_vstore oc chunk args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with | [IR addr; src] -> print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src | _ -> assert false end; fprintf oc "%s end builtin __builtin_volatile_write\n" comment let print_builtin_vstore_global oc chunk id ofs args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with | [src] -> let tmp = if src = IR GPR11 then GPR12 else GPR11 in fprintf oc " addis %a, %a, %a\n" ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src | _ -> assert false end; fprintf oc "%s end builtin __builtin_volatile_write\n" comment (* Handling of compiler-inlined builtins *) let print_builtin_inline oc name args res = fprintf oc "%s begin builtin %s\n" comment name; (* Can use as temporaries: GPR0, GPR11, GPR12, FPR0, FPR12, FPR13 *) begin match name, args, res with (* Integer arithmetic *) | "__builtin_mulhw", [IR a1; IR a2], IR res -> fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2 | "__builtin_mulhwu", [IR a1; IR a2], IR res -> fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2 | "__builtin_cntlz", [IR a1], IR res -> fprintf oc " cntlzw %a, %a\n" ireg res ireg a1 | "__builtin_bswap", [IR a1], IR res -> fprintf oc " stwu %a, -8(%a)\n" ireg a1 ireg GPR1; fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1 (* Float arithmetic *) | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res -> fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res -> fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 | "__builtin_fnmadd", [FR a1; FR a2; FR a3], FR res -> fprintf oc " fnmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 | "__builtin_fnmsub", [FR a1; FR a2; FR a3], FR res -> fprintf oc " fnmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 | "__builtin_fabs", [FR a1], FR res -> fprintf oc " fabs %a, %a\n" freg res freg a1 | "__builtin_fsqrt", [FR a1], FR res -> fprintf oc " fsqrt %a, %a\n" freg res freg a1 | "__builtin_frsqrte", [FR a1], FR res -> fprintf oc " frsqrte %a, %a\n" freg res freg a1 | "__builtin_fres", [FR a1], FR res -> fprintf oc " fres %a, %a\n" freg res freg a1 | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res -> fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 (* Memory accesses *) | "__builtin_read16_reversed", [IR a1], IR res -> fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 | "__builtin_read32_reversed", [IR a1], IR res -> fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 | "__builtin_write16_reversed", [IR a1; IR a2], _ -> fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 | "__builtin_write32_reversed", [IR a1; IR a2], _ -> fprintf oc " stwbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 (* Synchronization *) | "__builtin_eieio", [], _ -> fprintf oc " eieio\n" | "__builtin_sync", [], _ -> fprintf oc " sync\n" | "__builtin_isync", [], _ -> fprintf oc " isync\n" | "__builtin_trap", [], _ -> fprintf oc " trap\n" (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) end; fprintf oc "%s end builtin %s\n" comment name (* Determine if the displacement of a conditional branch fits the short form *) let short_cond_branch tbl pc lbl_dest = match PTree.get lbl_dest tbl with | None -> assert false | Some pc_dest -> let disp = pc_dest - pc in -0x2000 <= disp && disp < 0x2000 (* Printing of instructions *) let float_literals : (int * int64) list ref = ref [] let jumptables : (int * label list) list ref = ref [] let print_instruction oc tbl pc = function | Padd(r1, r2, r3) -> fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Padde(r1, r2, r3) -> fprintf oc " adde %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Paddi(r1, r2, c) -> fprintf oc " addi %a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c | Paddic(r1, r2, c) -> fprintf oc " addic %a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c | Paddis(r1, r2, c) -> fprintf oc " addis %a, %a, %a\n" ireg r1 ireg_or_zero r2 constant c | Paddze(r1, r2) -> fprintf oc " addze %a, %a\n" ireg r1 ireg r2 | Pallocframe(sz, ofs) -> let sz = camlint_of_coqint sz and ofs = camlint_of_coqint ofs in assert (ofs = 0l); let adj = Int32.neg sz in if adj >= -0x8000l then fprintf oc " stwu %a, %ld(%a)\n" ireg GPR1 adj ireg GPR1 else begin fprintf oc " addis %a, 0, %ld\n" ireg GPR12 (Int32.shift_right_logical adj 16); fprintf oc " ori %a, %a, %ld\n" ireg GPR12 ireg GPR12 (Int32.logand adj 0xFFFFl); fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR12 end | Pand_(r1, r2, r3) -> fprintf oc " and. %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pandc(r1, r2, r3) -> fprintf oc " andc %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pandi_(r1, r2, c) -> fprintf oc " andi. %a, %a, %a\n" ireg r1 ireg r2 constant c | Pandis_(r1, r2, c) -> fprintf oc " andis. %a, %a, %a\n" ireg r1 ireg r2 constant c | Pb lbl -> fprintf oc " b %a\n" label (transl_label lbl) | Pbctr -> fprintf oc " bctr\n" | Pbctrl -> fprintf oc " bctrl\n" | Pbf(bit, lbl) -> if short_cond_branch tbl pc lbl then fprintf oc " bf %a, %a\n" crbit bit label (transl_label lbl) else begin let next = new_label() in fprintf oc " bt %a, %a\n" crbit bit label next; fprintf oc " b %a\n" label (transl_label lbl); fprintf oc "%a:\n" label next end | Pbl s -> fprintf oc " bl %a\n" symbol s | Pbs s -> fprintf oc " b %a\n" symbol s | Pblr -> fprintf oc " blr\n" | Pbt(bit, lbl) -> if short_cond_branch tbl pc lbl then fprintf oc " bt %a, %a\n" crbit bit label (transl_label lbl) else begin let next = new_label() in fprintf oc " bf %a, %a\n" crbit bit label next; fprintf oc " b %a\n" label (transl_label lbl); fprintf oc "%a:\n" label next end | Pbtbl(r, tbl) -> let lbl = new_label() in fprintf oc "%s begin pseudoinstr btbl(%a)\n" comment ireg r; fprintf oc "%s jumptable [ " comment; List.iter (fun l -> fprintf oc "%a " label (transl_label l)) tbl; fprintf oc "]\n"; fprintf oc " addis %a, %a, %a\n" ireg GPR12 ireg r label_high lbl; fprintf oc " lwz %a, %a(%a)\n" ireg GPR12 label_low lbl ireg GPR12; fprintf oc " mtctr %a\n" ireg GPR12; fprintf oc " bctr\n"; jumptables := (lbl, tbl) :: !jumptables; fprintf oc "%s end pseudoinstr btbl\n" comment | Pcmplw(r1, r2) -> fprintf oc " cmplw %a, %a, %a\n" creg 0 ireg r1 ireg r2 | Pcmplwi(r1, c) -> fprintf oc " cmplwi %a, %a, %a\n" creg 0 ireg r1 constant c | Pcmpw(r1, r2) -> fprintf oc " cmpw %a, %a, %a\n" creg 0 ireg r1 ireg r2 | Pcmpwi(r1, c) -> fprintf oc " cmpwi %a, %a, %a\n" creg 0 ireg r1 constant c | Pcror(c1, c2, c3) -> fprintf oc " cror %a, %a, %a\n" crbit c1 crbit c2 crbit c3 | Pdivw(r1, r2, r3) -> fprintf oc " divw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pdivwu(r1, r2, r3) -> fprintf oc " divwu %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Peqv(r1, r2, r3) -> fprintf oc " eqv %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pextsb(r1, r2) -> fprintf oc " extsb %a, %a\n" ireg r1 ireg r2 | Pextsh(r1, r2) -> fprintf oc " extsh %a, %a\n" ireg r1 ireg r2 | Pfreeframe(sz, ofs) -> (* Note: could also do an add on GPR1 using sz *) fprintf oc " lwz %a, %ld(%a)\n" ireg GPR1 (camlint_of_coqint ofs) ireg GPR1 | Pfabs(r1, r2) -> fprintf oc " fabs %a, %a\n" freg r1 freg r2 | Pfadd(r1, r2, r3) -> fprintf oc " fadd %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfcmpu(r1, r2) -> fprintf oc " fcmpu %a, %a, %a\n" creg 0 freg r1 freg r2 | Pfcti(r1, r2) -> fprintf oc "%s begin pseudoinstr %a = fcti(%a)\n" comment ireg r1 freg r2; fprintf oc " fctiwz %a, %a\n" freg FPR13 freg r2; fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1; fprintf oc " lwz %a, 4(%a)\n" ireg r1 ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; fprintf oc "%s end pseudoinstr fcti\n" comment | Pfdiv(r1, r2, r3) -> fprintf oc " fdiv %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfmake(rd, r1, r2) -> fprintf oc "%s begin pseudoinstr %a = fmake(%a, %a)\n" comment freg rd ireg r1 ireg r2; fprintf oc " stwu %a, -8(%a)\n" ireg r1 ireg GPR1; fprintf oc " stw %a, 4(%a)\n" ireg r2 ireg GPR1; fprintf oc " lfd %a, 0(%a)\n" freg rd ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; fprintf oc "%s end pseudoinstr fmake\n" comment | Pfmr(r1, r2) -> fprintf oc " fmr %a, %a\n" freg r1 freg r2 | Pfmul(r1, r2, r3) -> fprintf oc " fmul %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfneg(r1, r2) -> fprintf oc " fneg %a, %a\n" freg r1 freg r2 | Pfrsp(r1, r2) -> fprintf oc " frsp %a, %a\n" freg r1 freg r2 | Pfsub(r1, r2, r3) -> fprintf oc " fsub %a, %a, %a\n" freg r1 freg r2 freg r3 | Plbz(r1, c, r2) -> fprintf oc " lbz %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plbzx(r1, r2, r3) -> fprintf oc " lbzx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Plfd(r1, c, r2) -> fprintf oc " lfd %a, %a(%a)\n" freg r1 constant c ireg r2 | Plfdx(r1, r2, r3) -> fprintf oc " lfdx %a, %a, %a\n" freg r1 ireg r2 ireg r3 | Plfi(r1, c) -> let lbl = new_label() in fprintf oc " addis %a, 0, %a\n" ireg GPR12 label_high lbl; fprintf oc " lfd %a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment c; float_literals := (lbl, Int64.bits_of_float c) :: !float_literals; | Plfs(r1, c, r2) -> fprintf oc " lfs %a, %a(%a)\n" freg r1 constant c ireg r2 | Plfsx(r1, r2, r3) -> fprintf oc " lfsx %a, %a, %a\n" freg r1 ireg r2 ireg r3 | Plha(r1, c, r2) -> fprintf oc " lha %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plhax(r1, r2, r3) -> fprintf oc " lhax %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Plhz(r1, c, r2) -> fprintf oc " lhz %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plhzx(r1, r2, r3) -> fprintf oc " lhzx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Plwz(r1, c, r2) -> fprintf oc " lwz %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plwzx(r1, r2, r3) -> fprintf oc " lwzx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pmfcrbit(r1, bit) -> fprintf oc " mfcr %a\n" ireg r1; fprintf oc " rlwinm %a, %a, %d, 31, 31\n" ireg r1 ireg r1 (1 + num_crbit bit) | Pmflr(r1) -> fprintf oc " mflr %a\n" ireg r1 | Pmr(r1, r2) -> fprintf oc " mr %a, %a\n" ireg r1 ireg r2 | Pmtctr(r1) -> fprintf oc " mtctr %a\n" ireg r1 | Pmtlr(r1) -> fprintf oc " mtlr %a\n" ireg r1 | Pmulli(r1, r2, c) -> fprintf oc " mulli %a, %a, %a\n" ireg r1 ireg r2 constant c | Pmullw(r1, r2, r3) -> fprintf oc " mullw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pnand(r1, r2, r3) -> fprintf oc " nand %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pnor(r1, r2, r3) -> fprintf oc " nor %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Por(r1, r2, r3) -> fprintf oc " or %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Porc(r1, r2, r3) -> fprintf oc " orc %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pori(r1, r2, c) -> fprintf oc " ori %a, %a, %a\n" ireg r1 ireg r2 constant c | Poris(r1, r2, c) -> fprintf oc " oris %a, %a, %a\n" ireg r1 ireg r2 constant c | Prlwinm(r1, r2, c1, c2) -> let (mb, me) = rolm_mask (camlint_of_coqint c2) in fprintf oc " rlwinm %a, %a, %ld, %d, %d %s 0x%lx\n" ireg r1 ireg r2 (camlint_of_coqint c1) mb me comment (camlint_of_coqint c2) | Prlwimi(r1, r2, c1, c2) -> let (mb, me) = rolm_mask (camlint_of_coqint c2) in fprintf oc " rlwimi %a, %a, %ld, %d, %d %s 0x%lx\n" ireg r1 ireg r2 (camlint_of_coqint c1) mb me comment (camlint_of_coqint c2) | Pslw(r1, r2, r3) -> fprintf oc " slw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psraw(r1, r2, r3) -> fprintf oc " sraw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psrawi(r1, r2, c) -> fprintf oc " srawi %a, %a, %ld\n" ireg r1 ireg r2 (camlint_of_coqint c) | Psrw(r1, r2, r3) -> fprintf oc " srw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pstb(r1, c, r2) -> fprintf oc " stb %a, %a(%a)\n" ireg r1 constant c ireg r2 | Pstbx(r1, r2, r3) -> fprintf oc " stbx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pstfd(r1, c, r2) -> fprintf oc " stfd %a, %a(%a)\n" freg r1 constant c ireg r2 | Pstfdx(r1, r2, r3) -> fprintf oc " stfdx %a, %a, %a\n" freg r1 ireg r2 ireg r3 | Pstfs(r1, c, r2) -> fprintf oc " frsp %a, %a\n" freg FPR13 freg r1; fprintf oc " stfs %a, %a(%a)\n" freg FPR13 constant c ireg r2 | Pstfsx(r1, r2, r3) -> fprintf oc " frsp %a, %a\n" freg FPR13 freg r1; fprintf oc " stfsx %a, %a, %a\n" freg FPR13 ireg r2 ireg r3 | Psth(r1, c, r2) -> fprintf oc " sth %a, %a(%a)\n" ireg r1 constant c ireg r2 | Psthx(r1, r2, r3) -> fprintf oc " sthx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pstw(r1, c, r2) -> fprintf oc " stw %a, %a(%a)\n" ireg r1 constant c ireg r2 | Pstwx(r1, r2, r3) -> fprintf oc " stwx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psubfc(r1, r2, r3) -> fprintf oc " subfc %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psubfe(r1, r2, r3) -> fprintf oc " subfe %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psubfic(r1, r2, c) -> fprintf oc " subfic %a, %a, %a\n" ireg r1 ireg r2 constant c | Pxor(r1, r2, r3) -> fprintf oc " xor %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pxori(r1, r2, c) -> fprintf oc " xori %a, %a, %a\n" ireg r1 ireg r2 constant c | Pxoris(r1, r2, c) -> fprintf oc " xoris %a, %a, %a\n" ireg r1 ireg r2 constant c | Plabel lbl -> fprintf oc "%a:\n" label (transl_label lbl) | Pbuiltin(ef, args, res) -> begin match ef with | EF_builtin(name, sg) -> print_builtin_inline oc (extern_atom name) args res | EF_vload chunk -> print_builtin_vload oc chunk args res | EF_vstore chunk -> print_builtin_vstore oc chunk args | EF_vload_global(chunk, id, ofs) -> print_builtin_vload_global oc chunk id ofs args res | EF_vstore_global(chunk, id, ofs) -> print_builtin_vstore_global oc chunk id ofs args | EF_memcpy(sz, al) -> print_builtin_memcpy oc (Int32.to_int (camlint_of_coqint sz)) (Int32.to_int (camlint_of_coqint al)) args | EF_annot_val(txt, targ) -> print_annot_val oc (extern_atom txt) args res | _ -> assert false end | Pannot(ef, args) -> begin match ef with | EF_annot(txt, targs) -> print_annot_stmt oc (extern_atom txt) args | _ -> assert false end (* Estimate the size of an Asm instruction encoding, in number of actual PowerPC instructions. We can over-approximate. *) let instr_size = function | Pallocframe(sz, ofs) -> 3 | Pbf(bit, lbl) -> 2 | Pbt(bit, lbl) -> 2 | Pbtbl(r, tbl) -> 4 | Pfcti(r1, r2) -> 4 | Pfmake(rd, r1, r2) -> 4 | Plfi(r1, c) -> 2 | Pmfcrbit(r1, bit) -> 2 | Pstfs(r1, c, r2) -> 2 | Pstfsx(r1, r2, r3) -> 2 | Plabel lbl -> 0 | Pbuiltin(ef, args, res) -> begin match ef with | EF_builtin(name, sg) -> begin match extern_atom name with | "__builtin_bswap" -> 3 | _ -> 1 end | EF_vload chunk -> if chunk = Mint8signed then 2 else 1 | EF_vstore chunk -> if chunk = Mfloat32 then 2 else 1 | EF_vload_global(chunk, id, ofs) -> if chunk = Mint8signed then 3 else 2 | EF_vstore_global(chunk, id, ofs) -> if chunk = Mfloat32 then 3 else 2 | EF_memcpy(sz, al) -> let sz = Int32.to_int (camlint_of_coqint sz) in if sz <= 64 then (sz / 4) * 2 + 6 else 11 | EF_annot_val(txt, targ) -> 0 | _ -> assert false end | Pannot(ef, args) -> 0 | _ -> 1 (* Build a table label -> estimated position in generated code. Used to predict if relative conditional branches can use the short form. *) let rec label_positions tbl pc = function | [] -> tbl | Plabel lbl :: c -> label_positions (PTree.set lbl pc tbl) pc c | i :: c -> label_positions tbl (pc + instr_size i) c (* Emit a sequence of instructions *) let rec print_instructions oc tbl pc = function | [] -> () | i :: c -> print_instruction oc tbl pc i; print_instructions oc tbl (pc + instr_size i) c (* Print the code for a function *) let print_literal oc (lbl, n) = let nlo = Int64.to_int32 n and nhi = Int64.to_int32(Int64.shift_right_logical n 32) in fprintf oc "%a: .long 0x%lx, 0x%lx\n" label lbl nhi nlo let print_jumptable oc (lbl, tbl) = fprintf oc "%a:" label lbl; List.iter (fun l -> fprintf oc " .long %a\n" label (transl_label l)) tbl let print_function oc name code = Hashtbl.clear current_function_labels; float_literals := []; jumptables := []; let (text, lit, jmptbl) = match C2C.atom_sections name with | [t;l;j] -> (t, l, j) | _ -> (Section_text, Section_literal, Section_jumptable) in section oc text; fprintf oc " .align 2\n"; if not (C2C.atom_is_static name) then fprintf oc " .globl %a\n" symbol name; fprintf oc "%a:\n" symbol name; print_instructions oc (label_positions PTree.empty 0 code) 0 code; if target <> MacOS then begin fprintf oc " .type %a, @function\n" symbol name; fprintf oc " .size %a, . - %a\n" symbol name symbol name end; if !float_literals <> [] then begin section oc lit; fprintf oc " .align 3\n"; List.iter (print_literal oc) !float_literals; float_literals := [] end; if !jumptables <> [] then begin section oc jmptbl; fprintf oc " .align 2\n"; List.iter (print_jumptable oc) !jumptables; jumptables := [] end (* Generation of stub functions *) let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$" (* Stubs for MacOS X *) module Stubs_MacOS = struct (* Generation of stub code for variadic functions, e.g. printf. Calling conventions for variadic functions are: - always reserve 8 stack words (offsets 24 to 52) so that the variadic function can save there the integer registers parameters r3 ... r10 - treat float arguments as pairs of integers, i.e. if we must pass them in registers, use a pair of integer registers for this purpose. The code we generate is: - allocate large enough stack frame - save return address - copy our arguments (registers and stack) to the stack frame, starting at offset 24 - load relevant integer parameter registers r3...r10 from the stack frame, limited by the actual number of arguments - call the variadic thing - deallocate stack frame and return *) let variadic_stub oc stub_name fun_name ty_args = (* Compute total size of arguments *) let arg_size = List.fold_left (fun sz ty -> match ty with Tint -> sz + 4 | Tfloat -> sz + 8) 0 ty_args in (* Stack size is linkage area + argument size, with a minimum of 56 bytes *) let frame_size = max 56 (24 + arg_size) in fprintf oc " mflr r0\n"; fprintf oc " stwu r1, %d(r1)\n" (-frame_size); fprintf oc " stw r0, %d(r1)\n" (frame_size + 4); (* Copy our parameters to our stack frame. As an optimization, don't copy parameters that are already in integer registers, since these stay in place. *) let rec copy gpr fpr src_ofs dst_ofs = function | [] -> () | Tint :: rem -> if gpr > 10 then begin fprintf oc " lwz r0, %d(r1)\n" src_ofs; fprintf oc " stw r0, %d(r1)\n" dst_ofs end; copy (gpr + 1) fpr (src_ofs + 4) (dst_ofs + 4) rem | Tfloat :: rem -> if fpr <= 10 then begin fprintf oc " stfd f%d, %d(r1)\n" fpr dst_ofs end else begin fprintf oc " lfd f0, %d(r1)\n" src_ofs; fprintf oc " stfd f0, %d(r1)\n" dst_ofs end; copy (gpr + 2) (fpr + 1) (src_ofs + 8) (dst_ofs + 8) rem in copy 3 1 (frame_size + 24) 24 ty_args; (* Load the first parameters into integer registers. As an optimization, don't load parameters that are already in the correct integer registers. *) let rec load gpr ofs = function | [] -> () | Tint :: rem -> load (gpr + 1) (ofs + 4) rem | Tfloat :: rem -> if gpr <= 10 then fprintf oc " lwz r%d, %d(r1)\n" gpr ofs; if gpr + 1 <= 10 then fprintf oc " lwz r%d, %d(r1)\n" (gpr + 1) (ofs + 4); load (gpr + 2) (ofs + 8) rem in load 3 24 ty_args; (* Call the function *) fprintf oc " addis r11, 0, ha16(L%s$ptr)\n" stub_name; fprintf oc " lwz r11, lo16(L%s$ptr)(r11)\n" stub_name; fprintf oc " mtctr r11\n"; fprintf oc " bctrl\n"; (* Free our frame and return *) fprintf oc " lwz r0, %d(r1)\n" (frame_size + 4); fprintf oc " mtlr r0\n"; fprintf oc " addi r1, r1, %d\n" frame_size; fprintf oc " blr\n"; (* The function pointer *) fprintf oc " .non_lazy_symbol_pointer\n"; fprintf oc "L%s$ptr:\n" stub_name; fprintf oc " .indirect_symbol _%s\n" fun_name; fprintf oc " .long 0\n" (* Stubs for fixed-type functions are much simpler *) let non_variadic_stub oc name = fprintf oc " addis r11, 0, ha16(L%s$ptr)\n" name; fprintf oc " lwz r11, lo16(L%s$ptr)(r11)\n" name; fprintf oc " mtctr r11\n"; fprintf oc " bctr\n"; fprintf oc " .non_lazy_symbol_pointer\n"; fprintf oc "L%s$ptr:\n" name; fprintf oc " .indirect_symbol _%s\n" name; fprintf oc " .long 0\n" let stub_function oc name sg = let name = extern_atom name in section oc Section_text; fprintf oc " .align 2\n"; fprintf oc "L%s$stub:\n" name; if Str.string_match re_variadic_stub name 0 then variadic_stub oc name (Str.matched_group 1 name) sg.sig_args else non_variadic_stub oc name let function_needs_stub name = true end (* Stubs for EABI *) module Stubs_EABI = struct let variadic_stub oc stub_name fun_name args = section oc Section_text; fprintf oc " .align 2\n"; fprintf oc ".L%s$stub:\n" stub_name; (* bit 6 must be set if at least one argument is a float; clear otherwise *) if List.mem Tfloat args then fprintf oc " creqv 6, 6, 6\n" else fprintf oc " crxor 6, 6, 6\n"; fprintf oc " b %s\n" fun_name let stub_function oc name sg = let name = extern_atom name in (* Only variadic functions need a stub *) if Str.string_match re_variadic_stub name 0 then variadic_stub oc name (Str.matched_group 1 name) sg.sig_args let function_needs_stub name = Str.string_match re_variadic_stub (extern_atom name) 0 end let function_needs_stub = match target with | MacOS -> Stubs_MacOS.function_needs_stub | Linux|Diab -> Stubs_EABI.function_needs_stub let stub_function = match target with | MacOS -> Stubs_MacOS.stub_function | Linux|Diab -> Stubs_EABI.stub_function let print_fundef oc (name, defn) = match defn with | Internal code -> print_function oc name code | External ((EF_external _ | EF_malloc | EF_free) as ef) -> if function_needs_stub name then stub_function oc name (ef_sig ef) | External _ -> () let record_extfun (name, defn) = match defn with | Internal _ -> () | External (EF_external _ | EF_malloc | EF_free) -> if function_needs_stub name then stubbed_functions := IdentSet.add name !stubbed_functions | External _ -> () let print_init oc = function | Init_int8 n -> fprintf oc " .byte %ld\n" (camlint_of_coqint n) | Init_int16 n -> fprintf oc " .short %ld\n" (camlint_of_coqint n) | Init_int32 n -> fprintf oc " .long %ld\n" (camlint_of_coqint n) | Init_float32 n -> fprintf oc " .long %ld %s %.18g\n" (Int32.bits_of_float n) comment n | Init_float64 n -> let b = Int64.bits_of_float n in fprintf oc " .long %Ld, %Ld %s %.18g\n" (Int64.shift_right_logical b 32) (Int64.logand b 0xFFFFFFFFL) comment n | Init_space n -> let n = camlint_of_z n in if n > 0l then fprintf oc " .space %ld\n" n | Init_addrof(symb, ofs) -> fprintf oc " .long %a\n" symbol_offset (symb, camlint_of_coqint ofs) let print_init_data oc name id = if Str.string_match PrintCsyntax.re_string_literal (extern_atom name) 0 && List.for_all (function Init_int8 _ -> true | _ -> false) id then fprintf oc " .ascii \"%s\"\n" (PrintCsyntax.string_of_init id) else List.iter (print_init oc) id let print_var oc (name, v) = match v.gvar_init with | [] -> () | _ -> let sec = match C2C.atom_sections name with | [s] -> s | _ -> Section_data true and align = match C2C.atom_alignof name with | Some a -> log2 a | None -> 3 in (* 8-alignment is a safe default *) let name_sec = name_of_section sec in if name_sec <> "COMM" then begin fprintf oc " %s\n" name_sec; fprintf oc " .align %d\n" align; if not (C2C.atom_is_static name) then fprintf oc " .globl %a\n" symbol name; fprintf oc "%a:\n" symbol name; print_init_data oc name v.gvar_init; if target <> MacOS then begin fprintf oc " .type %a, @object\n" symbol name; fprintf oc " .size %a, . - %a\n" symbol name symbol name end end else begin let sz = match v.gvar_init with [Init_space sz] -> sz | _ -> assert false in fprintf oc " %s %a, %ld, %d\n" (if C2C.atom_is_static name then ".lcomm" else ".comm") symbol name (camlint_of_coqint sz) (1 lsl align) end let print_program oc p = stubbed_functions := IdentSet.empty; List.iter record_extfun p.prog_funct; List.iter (print_var oc) p.prog_vars; List.iter (print_fundef oc) p.prog_funct